| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9987642455032266, |
| "eval_steps": 500, |
| "global_step": 2730, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010984484415762735, |
| "grad_norm": 0.13173329830169678, |
| "learning_rate": 1.0989010989010988e-06, |
| "loss": 0.8751, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002196896883152547, |
| "grad_norm": 0.19401921331882477, |
| "learning_rate": 2.1978021978021976e-06, |
| "loss": 1.3488, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0032953453247288205, |
| "grad_norm": 0.142131969332695, |
| "learning_rate": 3.2967032967032968e-06, |
| "loss": 0.8371, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004393793766305094, |
| "grad_norm": 0.1124999076128006, |
| "learning_rate": 4.395604395604395e-06, |
| "loss": 1.0039, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005492242207881368, |
| "grad_norm": 0.20683947205543518, |
| "learning_rate": 5.494505494505494e-06, |
| "loss": 1.4423, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006590690649457641, |
| "grad_norm": 0.2007640153169632, |
| "learning_rate": 6.5934065934065935e-06, |
| "loss": 0.9797, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007689139091033915, |
| "grad_norm": 0.1362670361995697, |
| "learning_rate": 7.692307692307692e-06, |
| "loss": 1.0443, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008787587532610188, |
| "grad_norm": 0.21512511372566223, |
| "learning_rate": 8.79120879120879e-06, |
| "loss": 1.2888, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.009886035974186462, |
| "grad_norm": 0.13403186202049255, |
| "learning_rate": 9.89010989010989e-06, |
| "loss": 0.9637, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010984484415762736, |
| "grad_norm": 0.16911157965660095, |
| "learning_rate": 1.0989010989010989e-05, |
| "loss": 0.8824, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012082932857339008, |
| "grad_norm": 0.19280359148979187, |
| "learning_rate": 1.2087912087912087e-05, |
| "loss": 0.9843, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.013181381298915282, |
| "grad_norm": 0.15720519423484802, |
| "learning_rate": 1.3186813186813187e-05, |
| "loss": 0.9769, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.014279829740491556, |
| "grad_norm": 0.18622402846813202, |
| "learning_rate": 1.4285714285714284e-05, |
| "loss": 0.903, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01537827818206783, |
| "grad_norm": 0.1491895169019699, |
| "learning_rate": 1.5384615384615384e-05, |
| "loss": 1.065, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.016476726623644102, |
| "grad_norm": 0.16883142292499542, |
| "learning_rate": 1.6483516483516482e-05, |
| "loss": 0.9916, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.017575175065220376, |
| "grad_norm": 0.155453160405159, |
| "learning_rate": 1.758241758241758e-05, |
| "loss": 1.1048, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01867362350679665, |
| "grad_norm": 0.12869666516780853, |
| "learning_rate": 1.868131868131868e-05, |
| "loss": 0.9355, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.019772071948372924, |
| "grad_norm": 0.18860433995723724, |
| "learning_rate": 1.978021978021978e-05, |
| "loss": 1.1779, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.020870520389949198, |
| "grad_norm": 0.30738529562950134, |
| "learning_rate": 2.087912087912088e-05, |
| "loss": 0.905, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.021968968831525472, |
| "grad_norm": 0.30248674750328064, |
| "learning_rate": 2.1978021978021977e-05, |
| "loss": 1.0749, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.023067417273101742, |
| "grad_norm": 0.17005079984664917, |
| "learning_rate": 2.3076923076923076e-05, |
| "loss": 1.0141, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.024165865714678016, |
| "grad_norm": 0.5497377514839172, |
| "learning_rate": 2.4175824175824174e-05, |
| "loss": 0.804, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02526431415625429, |
| "grad_norm": 0.23464925587177277, |
| "learning_rate": 2.5274725274725276e-05, |
| "loss": 1.0592, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.026362762597830564, |
| "grad_norm": 0.2906591594219208, |
| "learning_rate": 2.6373626373626374e-05, |
| "loss": 1.4096, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.027461211039406838, |
| "grad_norm": 0.14552968740463257, |
| "learning_rate": 2.747252747252747e-05, |
| "loss": 0.8827, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.028559659480983112, |
| "grad_norm": 0.26139914989471436, |
| "learning_rate": 2.8571428571428567e-05, |
| "loss": 1.1081, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.029658107922559386, |
| "grad_norm": 0.16122505068778992, |
| "learning_rate": 2.9670329670329666e-05, |
| "loss": 0.8967, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03075655636413566, |
| "grad_norm": 0.19174647331237793, |
| "learning_rate": 3.076923076923077e-05, |
| "loss": 0.7527, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.031855004805711934, |
| "grad_norm": 0.24506032466888428, |
| "learning_rate": 3.1868131868131866e-05, |
| "loss": 1.0981, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.032953453247288204, |
| "grad_norm": 0.18928349018096924, |
| "learning_rate": 3.2967032967032964e-05, |
| "loss": 1.2955, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03405190168886448, |
| "grad_norm": 0.20482106506824493, |
| "learning_rate": 3.406593406593406e-05, |
| "loss": 0.886, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03515035013044075, |
| "grad_norm": 0.17304010689258575, |
| "learning_rate": 3.516483516483516e-05, |
| "loss": 1.0062, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03624879857201702, |
| "grad_norm": 0.17006444931030273, |
| "learning_rate": 3.626373626373626e-05, |
| "loss": 0.76, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0373472470135933, |
| "grad_norm": 0.16570955514907837, |
| "learning_rate": 3.736263736263736e-05, |
| "loss": 0.7512, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03844569545516957, |
| "grad_norm": 0.4470347464084625, |
| "learning_rate": 3.8461538461538456e-05, |
| "loss": 1.051, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03954414389674585, |
| "grad_norm": 0.3013080060482025, |
| "learning_rate": 3.956043956043956e-05, |
| "loss": 1.1269, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04064259233832212, |
| "grad_norm": 0.33114469051361084, |
| "learning_rate": 4.065934065934065e-05, |
| "loss": 1.046, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.041741040779898396, |
| "grad_norm": 0.3496829867362976, |
| "learning_rate": 4.175824175824176e-05, |
| "loss": 0.9139, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.042839489221474666, |
| "grad_norm": 0.36173877120018005, |
| "learning_rate": 4.285714285714285e-05, |
| "loss": 1.16, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.043937937663050944, |
| "grad_norm": 0.23047995567321777, |
| "learning_rate": 4.3956043956043955e-05, |
| "loss": 0.8623, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.045036386104627214, |
| "grad_norm": 0.33733946084976196, |
| "learning_rate": 4.5054945054945046e-05, |
| "loss": 0.873, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.046134834546203485, |
| "grad_norm": 0.43975624442100525, |
| "learning_rate": 4.615384615384615e-05, |
| "loss": 0.9374, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04723328298777976, |
| "grad_norm": 0.5429202318191528, |
| "learning_rate": 4.725274725274725e-05, |
| "loss": 1.0699, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04833173142935603, |
| "grad_norm": 0.39317595958709717, |
| "learning_rate": 4.835164835164835e-05, |
| "loss": 0.7719, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04943017987093231, |
| "grad_norm": 0.41328710317611694, |
| "learning_rate": 4.9450549450549446e-05, |
| "loss": 1.112, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05052862831250858, |
| "grad_norm": 0.5977774858474731, |
| "learning_rate": 5.054945054945055e-05, |
| "loss": 0.9408, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05162707675408486, |
| "grad_norm": 0.6984797716140747, |
| "learning_rate": 5.164835164835164e-05, |
| "loss": 0.9766, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05272552519566113, |
| "grad_norm": 0.5161548256874084, |
| "learning_rate": 5.274725274725275e-05, |
| "loss": 1.3705, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0538239736372374, |
| "grad_norm": 0.5750108361244202, |
| "learning_rate": 5.384615384615384e-05, |
| "loss": 0.9492, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.054922422078813676, |
| "grad_norm": 0.7861920595169067, |
| "learning_rate": 5.494505494505494e-05, |
| "loss": 1.1495, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05602087052038995, |
| "grad_norm": 0.5992287993431091, |
| "learning_rate": 5.6043956043956037e-05, |
| "loss": 1.2818, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.057119318961966224, |
| "grad_norm": 0.5470016598701477, |
| "learning_rate": 5.7142857142857135e-05, |
| "loss": 1.0385, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.058217767403542495, |
| "grad_norm": 0.7035269141197205, |
| "learning_rate": 5.824175824175824e-05, |
| "loss": 0.785, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05931621584511877, |
| "grad_norm": 0.5253639817237854, |
| "learning_rate": 5.934065934065933e-05, |
| "loss": 0.6092, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06041466428669504, |
| "grad_norm": 0.5233064293861389, |
| "learning_rate": 6.043956043956044e-05, |
| "loss": 0.7853, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06151311272827132, |
| "grad_norm": 0.4508589804172516, |
| "learning_rate": 6.153846153846154e-05, |
| "loss": 0.5737, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06261156116984759, |
| "grad_norm": 1.0521594285964966, |
| "learning_rate": 6.263736263736263e-05, |
| "loss": 1.0132, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06371000961142387, |
| "grad_norm": 0.3572557866573334, |
| "learning_rate": 6.373626373626373e-05, |
| "loss": 0.655, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06480845805300013, |
| "grad_norm": 0.600371241569519, |
| "learning_rate": 6.483516483516483e-05, |
| "loss": 0.8897, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06590690649457641, |
| "grad_norm": 0.6430579423904419, |
| "learning_rate": 6.593406593406593e-05, |
| "loss": 0.8058, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06700535493615269, |
| "grad_norm": 0.5309410095214844, |
| "learning_rate": 6.703296703296703e-05, |
| "loss": 0.7312, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06810380337772896, |
| "grad_norm": 0.46225860714912415, |
| "learning_rate": 6.813186813186813e-05, |
| "loss": 0.8607, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06920225181930523, |
| "grad_norm": 0.8889493346214294, |
| "learning_rate": 6.923076923076922e-05, |
| "loss": 0.7791, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0703007002608815, |
| "grad_norm": 0.5721575617790222, |
| "learning_rate": 7.032967032967032e-05, |
| "loss": 0.9426, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07139914870245778, |
| "grad_norm": 0.8355056047439575, |
| "learning_rate": 7.142857142857142e-05, |
| "loss": 0.621, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07249759714403405, |
| "grad_norm": 1.3048707246780396, |
| "learning_rate": 7.252747252747252e-05, |
| "loss": 0.8869, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07359604558561032, |
| "grad_norm": 0.5817797183990479, |
| "learning_rate": 7.362637362637362e-05, |
| "loss": 0.8385, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0746944940271866, |
| "grad_norm": 1.2051454782485962, |
| "learning_rate": 7.472527472527472e-05, |
| "loss": 0.7566, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07579294246876288, |
| "grad_norm": 0.8565987944602966, |
| "learning_rate": 7.582417582417581e-05, |
| "loss": 0.8374, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07689139091033914, |
| "grad_norm": 0.7503894567489624, |
| "learning_rate": 7.692307692307691e-05, |
| "loss": 0.6749, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07798983935191542, |
| "grad_norm": 0.6298589706420898, |
| "learning_rate": 7.802197802197802e-05, |
| "loss": 0.9096, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0790882877934917, |
| "grad_norm": 0.8327789306640625, |
| "learning_rate": 7.912087912087912e-05, |
| "loss": 0.9836, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08018673623506796, |
| "grad_norm": 1.0001461505889893, |
| "learning_rate": 8.021978021978021e-05, |
| "loss": 0.6917, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08128518467664424, |
| "grad_norm": 0.8373435735702515, |
| "learning_rate": 8.13186813186813e-05, |
| "loss": 0.7703, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08238363311822051, |
| "grad_norm": 0.9785758256912231, |
| "learning_rate": 8.241758241758242e-05, |
| "loss": 0.8004, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08348208155979679, |
| "grad_norm": 0.8900540471076965, |
| "learning_rate": 8.351648351648352e-05, |
| "loss": 0.8238, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08458053000137306, |
| "grad_norm": 0.7411159873008728, |
| "learning_rate": 8.46153846153846e-05, |
| "loss": 1.0364, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08567897844294933, |
| "grad_norm": 0.4975040555000305, |
| "learning_rate": 8.57142857142857e-05, |
| "loss": 0.4814, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08677742688452561, |
| "grad_norm": 0.6698398590087891, |
| "learning_rate": 8.681318681318681e-05, |
| "loss": 0.6828, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08787587532610189, |
| "grad_norm": 0.5883696675300598, |
| "learning_rate": 8.791208791208791e-05, |
| "loss": 0.92, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08897432376767815, |
| "grad_norm": 0.9050906896591187, |
| "learning_rate": 8.901098901098901e-05, |
| "loss": 0.7229, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09007277220925443, |
| "grad_norm": 0.5996706485748291, |
| "learning_rate": 9.010989010989009e-05, |
| "loss": 0.699, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0911712206508307, |
| "grad_norm": 2.0782630443573, |
| "learning_rate": 9.120879120879119e-05, |
| "loss": 1.2118, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09226966909240697, |
| "grad_norm": 0.759730875492096, |
| "learning_rate": 9.23076923076923e-05, |
| "loss": 0.6397, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09336811753398325, |
| "grad_norm": 1.1138097047805786, |
| "learning_rate": 9.34065934065934e-05, |
| "loss": 0.8973, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09446656597555952, |
| "grad_norm": 0.9852680563926697, |
| "learning_rate": 9.45054945054945e-05, |
| "loss": 1.0733, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0955650144171358, |
| "grad_norm": 0.8435002565383911, |
| "learning_rate": 9.560439560439558e-05, |
| "loss": 0.8977, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09666346285871207, |
| "grad_norm": 1.3031998872756958, |
| "learning_rate": 9.67032967032967e-05, |
| "loss": 0.9852, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09776191130028834, |
| "grad_norm": 0.6343463063240051, |
| "learning_rate": 9.78021978021978e-05, |
| "loss": 0.6147, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09886035974186462, |
| "grad_norm": 0.7061794996261597, |
| "learning_rate": 9.890109890109889e-05, |
| "loss": 0.7437, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09995880818344088, |
| "grad_norm": 1.2231422662734985, |
| "learning_rate": 9.999999999999999e-05, |
| "loss": 0.7944, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.10105725662501716, |
| "grad_norm": 0.7199704647064209, |
| "learning_rate": 0.0001010989010989011, |
| "loss": 0.7355, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10215570506659344, |
| "grad_norm": 1.2740516662597656, |
| "learning_rate": 0.00010219780219780219, |
| "loss": 0.7622, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10325415350816972, |
| "grad_norm": 0.7762659788131714, |
| "learning_rate": 0.00010329670329670329, |
| "loss": 0.7074, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10435260194974598, |
| "grad_norm": 0.6618936061859131, |
| "learning_rate": 0.00010439560439560438, |
| "loss": 0.7667, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10545105039132226, |
| "grad_norm": 0.7244533896446228, |
| "learning_rate": 0.0001054945054945055, |
| "loss": 0.6451, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10654949883289853, |
| "grad_norm": 0.6391953229904175, |
| "learning_rate": 0.0001065934065934066, |
| "loss": 0.5637, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1076479472744748, |
| "grad_norm": 0.6992442607879639, |
| "learning_rate": 0.00010769230769230768, |
| "loss": 0.7112, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.10874639571605108, |
| "grad_norm": 1.0820791721343994, |
| "learning_rate": 0.00010879120879120878, |
| "loss": 0.9199, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10984484415762735, |
| "grad_norm": 0.6012185215950012, |
| "learning_rate": 0.00010989010989010988, |
| "loss": 0.5574, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11094329259920363, |
| "grad_norm": 0.822455644607544, |
| "learning_rate": 0.00011098901098901099, |
| "loss": 0.5185, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1120417410407799, |
| "grad_norm": 0.9417555332183838, |
| "learning_rate": 0.00011208791208791207, |
| "loss": 0.6883, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11314018948235617, |
| "grad_norm": 1.0258208513259888, |
| "learning_rate": 0.00011318681318681317, |
| "loss": 0.7588, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11423863792393245, |
| "grad_norm": 1.904179573059082, |
| "learning_rate": 0.00011428571428571427, |
| "loss": 0.7425, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11533708636550873, |
| "grad_norm": 1.5453238487243652, |
| "learning_rate": 0.00011538461538461538, |
| "loss": 0.658, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11643553480708499, |
| "grad_norm": 0.8801619410514832, |
| "learning_rate": 0.00011648351648351648, |
| "loss": 0.8432, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11753398324866127, |
| "grad_norm": 0.8567579388618469, |
| "learning_rate": 0.00011758241758241756, |
| "loss": 0.5904, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.11863243169023754, |
| "grad_norm": 0.9351131319999695, |
| "learning_rate": 0.00011868131868131866, |
| "loss": 0.7228, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.11973088013181381, |
| "grad_norm": 0.8817545175552368, |
| "learning_rate": 0.00011978021978021978, |
| "loss": 0.7853, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12082932857339009, |
| "grad_norm": 1.0484094619750977, |
| "learning_rate": 0.00012087912087912087, |
| "loss": 0.7049, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12192777701496636, |
| "grad_norm": 1.80658757686615, |
| "learning_rate": 0.00012197802197802197, |
| "loss": 0.669, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12302622545654264, |
| "grad_norm": 1.5311473608016968, |
| "learning_rate": 0.00012307692307692307, |
| "loss": 0.8342, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1241246738981189, |
| "grad_norm": 0.8968105912208557, |
| "learning_rate": 0.00012417582417582416, |
| "loss": 0.7199, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12522312233969518, |
| "grad_norm": 0.6149659156799316, |
| "learning_rate": 0.00012527472527472527, |
| "loss": 0.4961, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12632157078127146, |
| "grad_norm": 8.04592227935791, |
| "learning_rate": 0.00012637362637362635, |
| "loss": 0.7515, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12742001922284774, |
| "grad_norm": 0.7797659039497375, |
| "learning_rate": 0.00012747252747252746, |
| "loss": 0.7281, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.128518467664424, |
| "grad_norm": 0.6414046883583069, |
| "learning_rate": 0.00012857142857142855, |
| "loss": 0.6655, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.12961691610600026, |
| "grad_norm": 4.678529262542725, |
| "learning_rate": 0.00012967032967032966, |
| "loss": 0.9165, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13071536454757654, |
| "grad_norm": 0.8540724515914917, |
| "learning_rate": 0.00013076923076923077, |
| "loss": 0.7064, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.13181381298915282, |
| "grad_norm": 1.057844638824463, |
| "learning_rate": 0.00013186813186813186, |
| "loss": 0.6617, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1329122614307291, |
| "grad_norm": 0.8429140448570251, |
| "learning_rate": 0.00013296703296703294, |
| "loss": 0.8156, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13401070987230537, |
| "grad_norm": 0.9944230914115906, |
| "learning_rate": 0.00013406593406593405, |
| "loss": 0.5851, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13510915831388165, |
| "grad_norm": 0.6582810878753662, |
| "learning_rate": 0.00013516483516483517, |
| "loss": 0.5819, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13620760675545793, |
| "grad_norm": 1.3106951713562012, |
| "learning_rate": 0.00013626373626373625, |
| "loss": 0.7598, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13730605519703418, |
| "grad_norm": 1.0464080572128296, |
| "learning_rate": 0.00013736263736263734, |
| "loss": 0.7241, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13840450363861045, |
| "grad_norm": 0.8519262075424194, |
| "learning_rate": 0.00013846153846153845, |
| "loss": 0.7001, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.13950295208018673, |
| "grad_norm": 1.2764228582382202, |
| "learning_rate": 0.00013956043956043956, |
| "loss": 0.7152, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.140601400521763, |
| "grad_norm": 1.157472014427185, |
| "learning_rate": 0.00014065934065934064, |
| "loss": 0.697, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1416998489633393, |
| "grad_norm": 0.7153847813606262, |
| "learning_rate": 0.00014175824175824173, |
| "loss": 0.6897, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.14279829740491556, |
| "grad_norm": 0.7254152297973633, |
| "learning_rate": 0.00014285714285714284, |
| "loss": 0.5263, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14389674584649184, |
| "grad_norm": 1.3370522260665894, |
| "learning_rate": 0.00014395604395604395, |
| "loss": 0.7587, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1449951942880681, |
| "grad_norm": 1.092029333114624, |
| "learning_rate": 0.00014505494505494504, |
| "loss": 0.8674, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14609364272964437, |
| "grad_norm": 0.6123655438423157, |
| "learning_rate": 0.00014615384615384615, |
| "loss": 0.7163, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14719209117122065, |
| "grad_norm": 0.8476639986038208, |
| "learning_rate": 0.00014725274725274723, |
| "loss": 0.7241, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.14829053961279692, |
| "grad_norm": 0.9986979961395264, |
| "learning_rate": 0.00014835164835164835, |
| "loss": 0.6229, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1493889880543732, |
| "grad_norm": 0.8208728432655334, |
| "learning_rate": 0.00014945054945054943, |
| "loss": 0.5441, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.15048743649594948, |
| "grad_norm": 0.742091953754425, |
| "learning_rate": 0.00015054945054945054, |
| "loss": 0.6047, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.15158588493752576, |
| "grad_norm": 1.6566306352615356, |
| "learning_rate": 0.00015164835164835163, |
| "loss": 0.6381, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.152684333379102, |
| "grad_norm": 0.7735741138458252, |
| "learning_rate": 0.0001527472527472527, |
| "loss": 0.5842, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15378278182067828, |
| "grad_norm": 0.7116795778274536, |
| "learning_rate": 0.00015384615384615382, |
| "loss": 0.7117, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.15488123026225456, |
| "grad_norm": 0.6912885904312134, |
| "learning_rate": 0.00015494505494505494, |
| "loss": 0.763, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15597967870383084, |
| "grad_norm": 1.0789505243301392, |
| "learning_rate": 0.00015604395604395605, |
| "loss": 0.5534, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.15707812714540711, |
| "grad_norm": 1.0304033756256104, |
| "learning_rate": 0.00015714285714285713, |
| "loss": 0.4961, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1581765755869834, |
| "grad_norm": 1.0216940641403198, |
| "learning_rate": 0.00015824175824175824, |
| "loss": 0.8167, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.15927502402855967, |
| "grad_norm": 0.7767283916473389, |
| "learning_rate": 0.00015934065934065933, |
| "loss": 0.649, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.16037347247013592, |
| "grad_norm": 0.6125204563140869, |
| "learning_rate": 0.00016043956043956041, |
| "loss": 0.6596, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1614719209117122, |
| "grad_norm": 2.113314390182495, |
| "learning_rate": 0.00016153846153846153, |
| "loss": 0.6825, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.16257036935328847, |
| "grad_norm": 1.3892889022827148, |
| "learning_rate": 0.0001626373626373626, |
| "loss": 0.5162, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16366881779486475, |
| "grad_norm": 1.2544710636138916, |
| "learning_rate": 0.0001637362637362637, |
| "loss": 0.5992, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16476726623644103, |
| "grad_norm": 1.2952786684036255, |
| "learning_rate": 0.00016483516483516484, |
| "loss": 0.5968, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1658657146780173, |
| "grad_norm": 0.9910382628440857, |
| "learning_rate": 0.00016593406593406592, |
| "loss": 0.6138, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.16696416311959358, |
| "grad_norm": 0.7291635870933533, |
| "learning_rate": 0.00016703296703296703, |
| "loss": 0.8957, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.16806261156116986, |
| "grad_norm": 0.7290105819702148, |
| "learning_rate": 0.00016813186813186812, |
| "loss": 0.4864, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.1691610600027461, |
| "grad_norm": 1.1888444423675537, |
| "learning_rate": 0.0001692307692307692, |
| "loss": 0.913, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1702595084443224, |
| "grad_norm": 0.8183659315109253, |
| "learning_rate": 0.0001703296703296703, |
| "loss": 0.6405, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.17135795688589867, |
| "grad_norm": 0.8549530506134033, |
| "learning_rate": 0.0001714285714285714, |
| "loss": 0.7019, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.17245640532747494, |
| "grad_norm": 0.5960697531700134, |
| "learning_rate": 0.0001725274725274725, |
| "loss": 0.6728, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.17355485376905122, |
| "grad_norm": 0.6802973747253418, |
| "learning_rate": 0.00017362637362637362, |
| "loss": 0.6462, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1746533022106275, |
| "grad_norm": 0.5056049823760986, |
| "learning_rate": 0.00017472527472527473, |
| "loss": 0.5155, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.17575175065220378, |
| "grad_norm": 0.8181887865066528, |
| "learning_rate": 0.00017582417582417582, |
| "loss": 0.6631, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17685019909378003, |
| "grad_norm": 0.5748574137687683, |
| "learning_rate": 0.0001769230769230769, |
| "loss": 0.5807, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1779486475353563, |
| "grad_norm": 0.8585043549537659, |
| "learning_rate": 0.00017802197802197802, |
| "loss": 0.5412, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.17904709597693258, |
| "grad_norm": 0.8763203620910645, |
| "learning_rate": 0.0001791208791208791, |
| "loss": 1.0859, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.18014554441850886, |
| "grad_norm": 0.7327267527580261, |
| "learning_rate": 0.00018021978021978018, |
| "loss": 0.8034, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18124399286008513, |
| "grad_norm": 0.6813991665840149, |
| "learning_rate": 0.0001813186813186813, |
| "loss": 0.9236, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1823424413016614, |
| "grad_norm": 2.9234185218811035, |
| "learning_rate": 0.00018241758241758238, |
| "loss": 0.9148, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1834408897432377, |
| "grad_norm": 0.8117207884788513, |
| "learning_rate": 0.00018351648351648352, |
| "loss": 1.0514, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18453933818481394, |
| "grad_norm": 0.6485300064086914, |
| "learning_rate": 0.0001846153846153846, |
| "loss": 0.4764, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.18563778662639022, |
| "grad_norm": 0.43059054017066956, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 0.6289, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1867362350679665, |
| "grad_norm": 1.007095456123352, |
| "learning_rate": 0.0001868131868131868, |
| "loss": 0.5889, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18783468350954277, |
| "grad_norm": 1.6733218431472778, |
| "learning_rate": 0.0001879120879120879, |
| "loss": 0.8036, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.18893313195111905, |
| "grad_norm": 0.7533760666847229, |
| "learning_rate": 0.000189010989010989, |
| "loss": 0.7282, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.19003158039269533, |
| "grad_norm": 0.45892444252967834, |
| "learning_rate": 0.00019010989010989008, |
| "loss": 0.6273, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1911300288342716, |
| "grad_norm": 0.54690021276474, |
| "learning_rate": 0.00019120879120879117, |
| "loss": 0.669, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.19222847727584785, |
| "grad_norm": 0.7361836433410645, |
| "learning_rate": 0.0001923076923076923, |
| "loss": 0.8945, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.19332692571742413, |
| "grad_norm": 0.5876324772834778, |
| "learning_rate": 0.0001934065934065934, |
| "loss": 0.7557, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1944253741590004, |
| "grad_norm": 0.7753897309303284, |
| "learning_rate": 0.0001945054945054945, |
| "loss": 0.7904, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.19552382260057669, |
| "grad_norm": 0.6244968771934509, |
| "learning_rate": 0.0001956043956043956, |
| "loss": 0.7617, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.19662227104215296, |
| "grad_norm": 0.6300948262214661, |
| "learning_rate": 0.00019670329670329667, |
| "loss": 0.5884, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.19772071948372924, |
| "grad_norm": 0.5845354795455933, |
| "learning_rate": 0.00019780219780219779, |
| "loss": 0.8034, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19881916792530552, |
| "grad_norm": 0.5231277942657471, |
| "learning_rate": 0.00019890109890109887, |
| "loss": 0.5302, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.19991761636688177, |
| "grad_norm": 0.8393481969833374, |
| "learning_rate": 0.00019999999999999998, |
| "loss": 0.6376, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.20101606480845804, |
| "grad_norm": 0.5777038335800171, |
| "learning_rate": 0.00020109890109890107, |
| "loss": 0.5777, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.20211451325003432, |
| "grad_norm": 0.7751956582069397, |
| "learning_rate": 0.0002021978021978022, |
| "loss": 0.8368, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2032129616916106, |
| "grad_norm": 1.5582187175750732, |
| "learning_rate": 0.0002032967032967033, |
| "loss": 0.5087, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.20431141013318688, |
| "grad_norm": 0.8304231762886047, |
| "learning_rate": 0.00020439560439560438, |
| "loss": 0.5512, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.20540985857476315, |
| "grad_norm": 0.8545000553131104, |
| "learning_rate": 0.0002054945054945055, |
| "loss": 1.2533, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.20650830701633943, |
| "grad_norm": 0.4891647696495056, |
| "learning_rate": 0.00020659340659340657, |
| "loss": 0.5738, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.20760675545791568, |
| "grad_norm": 0.7159665822982788, |
| "learning_rate": 0.00020769230769230766, |
| "loss": 0.9266, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.20870520389949196, |
| "grad_norm": 0.5053237080574036, |
| "learning_rate": 0.00020879120879120877, |
| "loss": 0.4574, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.20980365234106824, |
| "grad_norm": 0.728336751461029, |
| "learning_rate": 0.00020989010989010985, |
| "loss": 0.6871, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2109021007826445, |
| "grad_norm": 0.8593311309814453, |
| "learning_rate": 0.000210989010989011, |
| "loss": 0.6788, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2120005492242208, |
| "grad_norm": 1.247111201286316, |
| "learning_rate": 0.00021208791208791208, |
| "loss": 0.5428, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.21309899766579707, |
| "grad_norm": 0.6636946201324463, |
| "learning_rate": 0.0002131868131868132, |
| "loss": 0.7935, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.21419744610737335, |
| "grad_norm": 0.5811622738838196, |
| "learning_rate": 0.00021428571428571427, |
| "loss": 0.4322, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2152958945489496, |
| "grad_norm": 0.5329126715660095, |
| "learning_rate": 0.00021538461538461536, |
| "loss": 0.7037, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.21639434299052587, |
| "grad_norm": 1.730969786643982, |
| "learning_rate": 0.00021648351648351647, |
| "loss": 1.0315, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.21749279143210215, |
| "grad_norm": 0.5242175459861755, |
| "learning_rate": 0.00021758241758241756, |
| "loss": 0.9285, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.21859123987367843, |
| "grad_norm": 0.4745596945285797, |
| "learning_rate": 0.00021868131868131864, |
| "loss": 0.5414, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.2196896883152547, |
| "grad_norm": 0.8693228363990784, |
| "learning_rate": 0.00021978021978021975, |
| "loss": 0.4576, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22078813675683098, |
| "grad_norm": 0.7073357105255127, |
| "learning_rate": 0.00022087912087912086, |
| "loss": 0.778, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.22188658519840726, |
| "grad_norm": 0.535009503364563, |
| "learning_rate": 0.00022197802197802198, |
| "loss": 0.7734, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.2229850336399835, |
| "grad_norm": 0.5862578749656677, |
| "learning_rate": 0.00022307692307692306, |
| "loss": 0.8612, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.2240834820815598, |
| "grad_norm": 0.5167233943939209, |
| "learning_rate": 0.00022417582417582415, |
| "loss": 0.6122, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.22518193052313606, |
| "grad_norm": 0.8982027769088745, |
| "learning_rate": 0.00022527472527472526, |
| "loss": 0.8905, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.22628037896471234, |
| "grad_norm": 0.7311340570449829, |
| "learning_rate": 0.00022637362637362634, |
| "loss": 1.0151, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.22737882740628862, |
| "grad_norm": 0.45674124360084534, |
| "learning_rate": 0.00022747252747252745, |
| "loss": 0.7056, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.2284772758478649, |
| "grad_norm": 0.6916844844818115, |
| "learning_rate": 0.00022857142857142854, |
| "loss": 0.5977, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.22957572428944117, |
| "grad_norm": 0.6632958650588989, |
| "learning_rate": 0.00022967032967032962, |
| "loss": 0.8228, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.23067417273101745, |
| "grad_norm": 0.3243491053581238, |
| "learning_rate": 0.00023076923076923076, |
| "loss": 0.4823, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2317726211725937, |
| "grad_norm": 0.45630499720573425, |
| "learning_rate": 0.00023186813186813185, |
| "loss": 0.7206, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.23287106961416998, |
| "grad_norm": 0.6726184487342834, |
| "learning_rate": 0.00023296703296703296, |
| "loss": 0.8211, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.23396951805574626, |
| "grad_norm": 0.45092982053756714, |
| "learning_rate": 0.00023406593406593405, |
| "loss": 0.6812, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.23506796649732253, |
| "grad_norm": 0.5624651312828064, |
| "learning_rate": 0.00023516483516483513, |
| "loss": 0.726, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2361664149388988, |
| "grad_norm": 1.1685765981674194, |
| "learning_rate": 0.00023626373626373624, |
| "loss": 0.7906, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.2372648633804751, |
| "grad_norm": 0.581599771976471, |
| "learning_rate": 0.00023736263736263733, |
| "loss": 0.7049, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.23836331182205137, |
| "grad_norm": 0.7660847902297974, |
| "learning_rate": 0.00023846153846153844, |
| "loss": 0.6105, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.23946176026362762, |
| "grad_norm": 0.5126472115516663, |
| "learning_rate": 0.00023956043956043955, |
| "loss": 0.7134, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2405602087052039, |
| "grad_norm": 0.48460498452186584, |
| "learning_rate": 0.00024065934065934066, |
| "loss": 0.5578, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.24165865714678017, |
| "grad_norm": 0.41463029384613037, |
| "learning_rate": 0.00024175824175824175, |
| "loss": 0.5589, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24275710558835645, |
| "grad_norm": 2.0703623294830322, |
| "learning_rate": 0.00024285714285714283, |
| "loss": 0.7128, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.24385555402993273, |
| "grad_norm": 1.5641820430755615, |
| "learning_rate": 0.00024395604395604394, |
| "loss": 0.4439, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.244954002471509, |
| "grad_norm": 0.34634652733802795, |
| "learning_rate": 0.00024505494505494503, |
| "loss": 0.5389, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.24605245091308528, |
| "grad_norm": 0.5669183135032654, |
| "learning_rate": 0.00024615384615384614, |
| "loss": 0.5699, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.24715089935466153, |
| "grad_norm": 0.6459633111953735, |
| "learning_rate": 0.0002472527472527472, |
| "loss": 0.7904, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2482493477962378, |
| "grad_norm": 0.9719502925872803, |
| "learning_rate": 0.0002483516483516483, |
| "loss": 0.7354, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.24934779623781408, |
| "grad_norm": 0.7433357834815979, |
| "learning_rate": 0.0002494505494505494, |
| "loss": 0.5772, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.25044624467939036, |
| "grad_norm": 0.42272481322288513, |
| "learning_rate": 0.00025054945054945053, |
| "loss": 0.5609, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2515446931209666, |
| "grad_norm": 1.2868828773498535, |
| "learning_rate": 0.00025164835164835165, |
| "loss": 0.5775, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2526431415625429, |
| "grad_norm": 0.40398430824279785, |
| "learning_rate": 0.0002527472527472527, |
| "loss": 0.742, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.25374159000411917, |
| "grad_norm": 0.46501678228378296, |
| "learning_rate": 0.0002538461538461538, |
| "loss": 0.69, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.25484003844569547, |
| "grad_norm": 0.46631869673728943, |
| "learning_rate": 0.00025494505494505493, |
| "loss": 0.7712, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2559384868872717, |
| "grad_norm": 0.6761367321014404, |
| "learning_rate": 0.000256043956043956, |
| "loss": 0.64, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.257036935328848, |
| "grad_norm": 0.6253519654273987, |
| "learning_rate": 0.0002571428571428571, |
| "loss": 0.5499, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2581353837704243, |
| "grad_norm": 1.0556268692016602, |
| "learning_rate": 0.0002582417582417582, |
| "loss": 0.869, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2592338322120005, |
| "grad_norm": 0.4816044867038727, |
| "learning_rate": 0.0002593406593406593, |
| "loss": 0.6061, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.26033228065357683, |
| "grad_norm": 1.1049383878707886, |
| "learning_rate": 0.00026043956043956043, |
| "loss": 0.7695, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2614307290951531, |
| "grad_norm": 0.44643181562423706, |
| "learning_rate": 0.00026153846153846154, |
| "loss": 0.7849, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2625291775367294, |
| "grad_norm": 0.5231640338897705, |
| "learning_rate": 0.0002626373626373626, |
| "loss": 0.8033, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.26362762597830564, |
| "grad_norm": 0.5537316799163818, |
| "learning_rate": 0.0002637362637362637, |
| "loss": 0.7317, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.26472607441988194, |
| "grad_norm": 0.42069998383522034, |
| "learning_rate": 0.0002648351648351648, |
| "loss": 0.6325, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2658245228614582, |
| "grad_norm": 0.8009732365608215, |
| "learning_rate": 0.0002659340659340659, |
| "loss": 0.6589, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.26692297130303444, |
| "grad_norm": 1.2626444101333618, |
| "learning_rate": 0.000267032967032967, |
| "loss": 0.5845, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.26802141974461074, |
| "grad_norm": 0.4783913195133209, |
| "learning_rate": 0.0002681318681318681, |
| "loss": 0.8844, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.269119868186187, |
| "grad_norm": 1.098160982131958, |
| "learning_rate": 0.0002692307692307692, |
| "loss": 0.6134, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2702183166277633, |
| "grad_norm": 1.0397273302078247, |
| "learning_rate": 0.00027032967032967033, |
| "loss": 0.7861, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.27131676506933955, |
| "grad_norm": 0.9729229807853699, |
| "learning_rate": 0.0002714285714285714, |
| "loss": 0.7691, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.27241521351091585, |
| "grad_norm": 0.44837963581085205, |
| "learning_rate": 0.0002725274725274725, |
| "loss": 0.9414, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2735136619524921, |
| "grad_norm": 1.4863499402999878, |
| "learning_rate": 0.0002736263736263736, |
| "loss": 0.5825, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.27461211039406835, |
| "grad_norm": 0.5948237180709839, |
| "learning_rate": 0.00027472527472527467, |
| "loss": 0.4934, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.27571055883564466, |
| "grad_norm": 0.5448721051216125, |
| "learning_rate": 0.0002758241758241758, |
| "loss": 0.6295, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2768090072772209, |
| "grad_norm": 0.4309394657611847, |
| "learning_rate": 0.0002769230769230769, |
| "loss": 0.6561, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2779074557187972, |
| "grad_norm": 0.7659335136413574, |
| "learning_rate": 0.000278021978021978, |
| "loss": 0.7588, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.27900590416037346, |
| "grad_norm": 0.45655715465545654, |
| "learning_rate": 0.0002791208791208791, |
| "loss": 0.5257, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.28010435260194977, |
| "grad_norm": 0.5390630960464478, |
| "learning_rate": 0.0002802197802197802, |
| "loss": 0.7051, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.281202801043526, |
| "grad_norm": 0.39703306555747986, |
| "learning_rate": 0.0002813186813186813, |
| "loss": 0.6137, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.28230124948510227, |
| "grad_norm": 0.4662924110889435, |
| "learning_rate": 0.0002824175824175824, |
| "loss": 0.4897, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.2833996979266786, |
| "grad_norm": 0.39399877190589905, |
| "learning_rate": 0.00028351648351648346, |
| "loss": 0.6235, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2844981463682548, |
| "grad_norm": 0.497549444437027, |
| "learning_rate": 0.00028461538461538457, |
| "loss": 0.5134, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.28559659480983113, |
| "grad_norm": 0.6597803235054016, |
| "learning_rate": 0.0002857142857142857, |
| "loss": 0.7955, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2866950432514074, |
| "grad_norm": 0.5545711517333984, |
| "learning_rate": 0.0002868131868131868, |
| "loss": 0.833, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2877934916929837, |
| "grad_norm": 1.0227786302566528, |
| "learning_rate": 0.0002879120879120879, |
| "loss": 0.5249, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.28889194013455993, |
| "grad_norm": 0.5727143883705139, |
| "learning_rate": 0.000289010989010989, |
| "loss": 0.6319, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2899903885761362, |
| "grad_norm": 0.39322397112846375, |
| "learning_rate": 0.0002901098901098901, |
| "loss": 0.7003, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2910888370177125, |
| "grad_norm": 0.5657737851142883, |
| "learning_rate": 0.0002912087912087912, |
| "loss": 0.7085, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.29218728545928874, |
| "grad_norm": 0.4305976927280426, |
| "learning_rate": 0.0002923076923076923, |
| "loss": 0.5931, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.29328573390086504, |
| "grad_norm": 0.5300284624099731, |
| "learning_rate": 0.00029340659340659336, |
| "loss": 0.7881, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.2943841823424413, |
| "grad_norm": 0.5922349095344543, |
| "learning_rate": 0.00029450549450549447, |
| "loss": 0.8688, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.2954826307840176, |
| "grad_norm": 0.5700828433036804, |
| "learning_rate": 0.0002956043956043956, |
| "loss": 1.1328, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.29658107922559385, |
| "grad_norm": 0.6773694753646851, |
| "learning_rate": 0.0002967032967032967, |
| "loss": 0.7821, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2976795276671701, |
| "grad_norm": 0.5200739502906799, |
| "learning_rate": 0.0002978021978021978, |
| "loss": 0.8775, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.2987779761087464, |
| "grad_norm": 0.9860020875930786, |
| "learning_rate": 0.00029890109890109886, |
| "loss": 0.9141, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.29987642455032265, |
| "grad_norm": 0.7012956142425537, |
| "learning_rate": 0.0003, |
| "loss": 0.7672, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.30097487299189896, |
| "grad_norm": 0.4128098785877228, |
| "learning_rate": 0.0002998778998778999, |
| "loss": 0.3969, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3020733214334752, |
| "grad_norm": 0.366597980260849, |
| "learning_rate": 0.00029975579975579974, |
| "loss": 0.639, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3031717698750515, |
| "grad_norm": 0.5208033919334412, |
| "learning_rate": 0.0002996336996336996, |
| "loss": 0.664, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.30427021831662776, |
| "grad_norm": 0.45519202947616577, |
| "learning_rate": 0.0002995115995115995, |
| "loss": 0.8495, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.305368666758204, |
| "grad_norm": 0.6617010831832886, |
| "learning_rate": 0.0002993894993894994, |
| "loss": 1.0204, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3064671151997803, |
| "grad_norm": 1.4151723384857178, |
| "learning_rate": 0.00029926739926739923, |
| "loss": 0.8289, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.30756556364135657, |
| "grad_norm": 0.6531035900115967, |
| "learning_rate": 0.00029914529914529915, |
| "loss": 0.7571, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.30866401208293287, |
| "grad_norm": 0.8595600724220276, |
| "learning_rate": 0.000299023199023199, |
| "loss": 0.9668, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3097624605245091, |
| "grad_norm": 0.50210040807724, |
| "learning_rate": 0.00029890109890109886, |
| "loss": 0.6662, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3108609089660854, |
| "grad_norm": 0.6004669666290283, |
| "learning_rate": 0.0002987789987789988, |
| "loss": 0.7127, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3119593574076617, |
| "grad_norm": 0.8085057139396667, |
| "learning_rate": 0.00029865689865689863, |
| "loss": 0.9266, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3130578058492379, |
| "grad_norm": 0.44965627789497375, |
| "learning_rate": 0.0002985347985347985, |
| "loss": 0.7118, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.31415625429081423, |
| "grad_norm": 0.5758265852928162, |
| "learning_rate": 0.00029841269841269835, |
| "loss": 0.6915, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3152547027323905, |
| "grad_norm": 0.5623393058776855, |
| "learning_rate": 0.00029829059829059826, |
| "loss": 0.6962, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3163531511739668, |
| "grad_norm": 0.857796311378479, |
| "learning_rate": 0.0002981684981684982, |
| "loss": 0.676, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.31745159961554303, |
| "grad_norm": 0.36431241035461426, |
| "learning_rate": 0.000298046398046398, |
| "loss": 0.5475, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.31855004805711934, |
| "grad_norm": 0.4778802692890167, |
| "learning_rate": 0.0002979242979242979, |
| "loss": 0.7198, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3196484964986956, |
| "grad_norm": 0.4887610673904419, |
| "learning_rate": 0.0002978021978021978, |
| "loss": 0.5559, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.32074694494027184, |
| "grad_norm": 0.745379626750946, |
| "learning_rate": 0.00029768009768009766, |
| "loss": 1.0509, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.32184539338184814, |
| "grad_norm": 0.40081167221069336, |
| "learning_rate": 0.0002975579975579975, |
| "loss": 0.6564, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.3229438418234244, |
| "grad_norm": 0.5133034586906433, |
| "learning_rate": 0.00029743589743589743, |
| "loss": 0.6765, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3240422902650007, |
| "grad_norm": 0.5123881697654724, |
| "learning_rate": 0.0002973137973137973, |
| "loss": 0.8001, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.32514073870657695, |
| "grad_norm": 0.3771597743034363, |
| "learning_rate": 0.00029719169719169715, |
| "loss": 0.785, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.32623918714815325, |
| "grad_norm": 0.38929086923599243, |
| "learning_rate": 0.00029706959706959706, |
| "loss": 0.7273, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3273376355897295, |
| "grad_norm": 0.47761446237564087, |
| "learning_rate": 0.0002969474969474969, |
| "loss": 0.6997, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3284360840313058, |
| "grad_norm": 0.4798452854156494, |
| "learning_rate": 0.0002968253968253968, |
| "loss": 0.7171, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.32953453247288206, |
| "grad_norm": 0.5864073038101196, |
| "learning_rate": 0.0002967032967032967, |
| "loss": 0.7075, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3306329809144583, |
| "grad_norm": 0.6298258900642395, |
| "learning_rate": 0.00029658119658119655, |
| "loss": 0.8659, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.3317314293560346, |
| "grad_norm": 0.9764651656150818, |
| "learning_rate": 0.0002964590964590964, |
| "loss": 0.7451, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.33282987779761086, |
| "grad_norm": 0.7084535360336304, |
| "learning_rate": 0.0002963369963369963, |
| "loss": 0.7896, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.33392832623918717, |
| "grad_norm": 0.3226016163825989, |
| "learning_rate": 0.0002962148962148962, |
| "loss": 0.5614, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3350267746807634, |
| "grad_norm": 0.5515668988227844, |
| "learning_rate": 0.0002960927960927961, |
| "loss": 0.6981, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3361252231223397, |
| "grad_norm": 0.42776307463645935, |
| "learning_rate": 0.00029597069597069595, |
| "loss": 0.5911, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.33722367156391597, |
| "grad_norm": 0.36645814776420593, |
| "learning_rate": 0.0002958485958485958, |
| "loss": 0.5584, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3383221200054922, |
| "grad_norm": 0.4089672565460205, |
| "learning_rate": 0.0002957264957264957, |
| "loss": 0.6814, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3394205684470685, |
| "grad_norm": 0.4406324326992035, |
| "learning_rate": 0.0002956043956043956, |
| "loss": 0.5426, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3405190168886448, |
| "grad_norm": 0.4138193726539612, |
| "learning_rate": 0.00029548229548229544, |
| "loss": 0.7554, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3416174653302211, |
| "grad_norm": 0.45647338032722473, |
| "learning_rate": 0.00029536019536019535, |
| "loss": 0.4871, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.34271591377179733, |
| "grad_norm": 0.44362974166870117, |
| "learning_rate": 0.0002952380952380952, |
| "loss": 0.7254, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.34381436221337364, |
| "grad_norm": 0.5832559466362, |
| "learning_rate": 0.00029511599511599507, |
| "loss": 0.64, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3449128106549499, |
| "grad_norm": 0.6754651665687561, |
| "learning_rate": 0.000294993894993895, |
| "loss": 0.7046, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.34601125909652614, |
| "grad_norm": 0.6487123370170593, |
| "learning_rate": 0.00029487179487179484, |
| "loss": 0.5934, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.34710970753810244, |
| "grad_norm": 0.24118930101394653, |
| "learning_rate": 0.0002947496947496947, |
| "loss": 0.5241, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3482081559796787, |
| "grad_norm": 0.4580494165420532, |
| "learning_rate": 0.0002946275946275946, |
| "loss": 0.6733, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.349306604421255, |
| "grad_norm": 0.4770609736442566, |
| "learning_rate": 0.00029450549450549447, |
| "loss": 0.5758, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.35040505286283125, |
| "grad_norm": 0.40334221720695496, |
| "learning_rate": 0.0002943833943833944, |
| "loss": 0.5365, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.35150350130440755, |
| "grad_norm": 0.5605480074882507, |
| "learning_rate": 0.00029426129426129424, |
| "loss": 0.5967, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3526019497459838, |
| "grad_norm": 0.6031836271286011, |
| "learning_rate": 0.0002941391941391941, |
| "loss": 0.6397, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.35370039818756005, |
| "grad_norm": 0.5602075457572937, |
| "learning_rate": 0.000294017094017094, |
| "loss": 0.7253, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.35479884662913636, |
| "grad_norm": 1.5055879354476929, |
| "learning_rate": 0.00029389499389499387, |
| "loss": 0.6066, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3558972950707126, |
| "grad_norm": 1.969072699546814, |
| "learning_rate": 0.0002937728937728937, |
| "loss": 0.9263, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3569957435122889, |
| "grad_norm": 0.43139147758483887, |
| "learning_rate": 0.00029365079365079364, |
| "loss": 0.6462, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.35809419195386516, |
| "grad_norm": 0.40423595905303955, |
| "learning_rate": 0.0002935286935286935, |
| "loss": 0.4278, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.35919264039544146, |
| "grad_norm": 0.41983166337013245, |
| "learning_rate": 0.00029340659340659336, |
| "loss": 0.7527, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3602910888370177, |
| "grad_norm": 0.6624807715415955, |
| "learning_rate": 0.00029328449328449327, |
| "loss": 0.7381, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.36138953727859396, |
| "grad_norm": 0.6173990964889526, |
| "learning_rate": 0.00029316239316239313, |
| "loss": 0.6838, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.36248798572017027, |
| "grad_norm": 1.1278433799743652, |
| "learning_rate": 0.000293040293040293, |
| "loss": 0.8439, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3635864341617465, |
| "grad_norm": 0.3453993797302246, |
| "learning_rate": 0.0002929181929181929, |
| "loss": 0.5324, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3646848826033228, |
| "grad_norm": 0.4151187241077423, |
| "learning_rate": 0.0002927960927960928, |
| "loss": 0.7019, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3657833310448991, |
| "grad_norm": 0.4247313439846039, |
| "learning_rate": 0.0002926739926739926, |
| "loss": 0.6362, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3668817794864754, |
| "grad_norm": 1.5250136852264404, |
| "learning_rate": 0.00029255189255189253, |
| "loss": 0.5885, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.36798022792805163, |
| "grad_norm": 0.43669968843460083, |
| "learning_rate": 0.00029242979242979244, |
| "loss": 0.9191, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3690786763696279, |
| "grad_norm": 0.8063925504684448, |
| "learning_rate": 0.0002923076923076923, |
| "loss": 0.6813, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3701771248112042, |
| "grad_norm": 0.6002399325370789, |
| "learning_rate": 0.00029218559218559216, |
| "loss": 0.5859, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.37127557325278043, |
| "grad_norm": 0.9405462145805359, |
| "learning_rate": 0.000292063492063492, |
| "loss": 0.7476, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.37237402169435674, |
| "grad_norm": 0.5050615072250366, |
| "learning_rate": 0.00029194139194139193, |
| "loss": 0.5172, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.373472470135933, |
| "grad_norm": 0.4593801200389862, |
| "learning_rate": 0.0002918192918192918, |
| "loss": 0.5405, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3745709185775093, |
| "grad_norm": 0.5275060534477234, |
| "learning_rate": 0.00029169719169719164, |
| "loss": 0.4537, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.37566936701908554, |
| "grad_norm": 0.8907522559165955, |
| "learning_rate": 0.00029157509157509156, |
| "loss": 0.6826, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3767678154606618, |
| "grad_norm": 0.7229670882225037, |
| "learning_rate": 0.0002914529914529914, |
| "loss": 0.6072, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3778662639022381, |
| "grad_norm": 1.7154827117919922, |
| "learning_rate": 0.0002913308913308913, |
| "loss": 0.6956, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.37896471234381435, |
| "grad_norm": 1.012902021408081, |
| "learning_rate": 0.0002912087912087912, |
| "loss": 0.5337, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.38006316078539065, |
| "grad_norm": 0.6467313170433044, |
| "learning_rate": 0.00029108669108669105, |
| "loss": 0.7652, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3811616092269669, |
| "grad_norm": 0.5594947338104248, |
| "learning_rate": 0.0002909645909645909, |
| "loss": 0.578, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3822600576685432, |
| "grad_norm": 0.5808854699134827, |
| "learning_rate": 0.0002908424908424908, |
| "loss": 0.6142, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.38335850611011946, |
| "grad_norm": 0.6067795157432556, |
| "learning_rate": 0.00029072039072039073, |
| "loss": 0.7682, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3844569545516957, |
| "grad_norm": 0.392993301153183, |
| "learning_rate": 0.0002905982905982906, |
| "loss": 0.6599, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.385555402993272, |
| "grad_norm": 0.3963404893875122, |
| "learning_rate": 0.00029047619047619045, |
| "loss": 0.7079, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.38665385143484826, |
| "grad_norm": 0.3471222221851349, |
| "learning_rate": 0.00029035409035409036, |
| "loss": 0.463, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.38775229987642457, |
| "grad_norm": 0.5496531128883362, |
| "learning_rate": 0.0002902319902319902, |
| "loss": 0.7639, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3888507483180008, |
| "grad_norm": 0.5482885241508484, |
| "learning_rate": 0.0002901098901098901, |
| "loss": 0.4198, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3899491967595771, |
| "grad_norm": 0.7329181432723999, |
| "learning_rate": 0.00028998778998779, |
| "loss": 0.6057, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.39104764520115337, |
| "grad_norm": 0.41850918531417847, |
| "learning_rate": 0.00028986568986568985, |
| "loss": 0.605, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3921460936427296, |
| "grad_norm": 0.4463609457015991, |
| "learning_rate": 0.0002897435897435897, |
| "loss": 0.7381, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3932445420843059, |
| "grad_norm": 0.7207491397857666, |
| "learning_rate": 0.0002896214896214896, |
| "loss": 0.6892, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.3943429905258822, |
| "grad_norm": 0.3715958595275879, |
| "learning_rate": 0.0002894993894993895, |
| "loss": 0.5426, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3954414389674585, |
| "grad_norm": 0.7077822685241699, |
| "learning_rate": 0.00028937728937728933, |
| "loss": 0.5923, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.39653988740903473, |
| "grad_norm": 0.5109585523605347, |
| "learning_rate": 0.00028925518925518925, |
| "loss": 0.5939, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.39763833585061104, |
| "grad_norm": 0.6105355024337769, |
| "learning_rate": 0.0002891330891330891, |
| "loss": 1.0345, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3987367842921873, |
| "grad_norm": 0.479732871055603, |
| "learning_rate": 0.000289010989010989, |
| "loss": 0.71, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.39983523273376353, |
| "grad_norm": 0.8600007891654968, |
| "learning_rate": 0.0002888888888888888, |
| "loss": 0.7406, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.40093368117533984, |
| "grad_norm": 0.6584550738334656, |
| "learning_rate": 0.00028876678876678873, |
| "loss": 0.6658, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.4020321296169161, |
| "grad_norm": 0.7251041531562805, |
| "learning_rate": 0.00028864468864468865, |
| "loss": 0.8425, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4031305780584924, |
| "grad_norm": 0.5729238390922546, |
| "learning_rate": 0.0002885225885225885, |
| "loss": 0.9054, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.40422902650006864, |
| "grad_norm": 1.1829932928085327, |
| "learning_rate": 0.00028840048840048836, |
| "loss": 0.9232, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.40532747494164495, |
| "grad_norm": 0.37746721506118774, |
| "learning_rate": 0.0002882783882783883, |
| "loss": 0.9619, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.4064259233832212, |
| "grad_norm": 0.5653749108314514, |
| "learning_rate": 0.00028815628815628813, |
| "loss": 0.7182, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.40752437182479745, |
| "grad_norm": 0.6024563312530518, |
| "learning_rate": 0.000288034188034188, |
| "loss": 0.6881, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.40862282026637375, |
| "grad_norm": 0.485350102186203, |
| "learning_rate": 0.0002879120879120879, |
| "loss": 0.6451, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.40972126870795, |
| "grad_norm": 0.5762611627578735, |
| "learning_rate": 0.00028778998778998776, |
| "loss": 0.7818, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.4108197171495263, |
| "grad_norm": 0.7961844801902771, |
| "learning_rate": 0.0002876678876678876, |
| "loss": 0.6682, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.41191816559110256, |
| "grad_norm": 0.4630587697029114, |
| "learning_rate": 0.00028754578754578753, |
| "loss": 0.9015, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.41301661403267886, |
| "grad_norm": 0.6592808961868286, |
| "learning_rate": 0.0002874236874236874, |
| "loss": 0.5738, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4141150624742551, |
| "grad_norm": 0.4788278639316559, |
| "learning_rate": 0.00028730158730158725, |
| "loss": 0.7022, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.41521351091583136, |
| "grad_norm": 0.5041861534118652, |
| "learning_rate": 0.00028717948717948716, |
| "loss": 0.6137, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.41631195935740767, |
| "grad_norm": 0.5436013340950012, |
| "learning_rate": 0.000287057387057387, |
| "loss": 0.6621, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4174104077989839, |
| "grad_norm": 0.5102400183677673, |
| "learning_rate": 0.00028693528693528694, |
| "loss": 0.6627, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4185088562405602, |
| "grad_norm": 0.43655040860176086, |
| "learning_rate": 0.0002868131868131868, |
| "loss": 0.6475, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.4196073046821365, |
| "grad_norm": 0.3989826738834381, |
| "learning_rate": 0.00028669108669108665, |
| "loss": 0.5483, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4207057531237128, |
| "grad_norm": 0.7781158685684204, |
| "learning_rate": 0.00028656898656898656, |
| "loss": 0.6475, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.421804201565289, |
| "grad_norm": 0.8119930624961853, |
| "learning_rate": 0.0002864468864468864, |
| "loss": 0.8122, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4229026500068653, |
| "grad_norm": 0.7233585119247437, |
| "learning_rate": 0.0002863247863247863, |
| "loss": 0.7837, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4240010984484416, |
| "grad_norm": 0.41249507665634155, |
| "learning_rate": 0.0002862026862026862, |
| "loss": 0.6916, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.42509954689001783, |
| "grad_norm": 0.4865298867225647, |
| "learning_rate": 0.00028608058608058605, |
| "loss": 0.595, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.42619799533159414, |
| "grad_norm": 0.6057963371276855, |
| "learning_rate": 0.0002859584859584859, |
| "loss": 0.7214, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.4272964437731704, |
| "grad_norm": 0.5390968918800354, |
| "learning_rate": 0.0002858363858363858, |
| "loss": 0.805, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.4283948922147467, |
| "grad_norm": 0.5944109559059143, |
| "learning_rate": 0.0002857142857142857, |
| "loss": 0.9953, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.42949334065632294, |
| "grad_norm": 0.5480278134346008, |
| "learning_rate": 0.00028559218559218554, |
| "loss": 0.8406, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.4305917890978992, |
| "grad_norm": 0.5168552994728088, |
| "learning_rate": 0.00028547008547008545, |
| "loss": 0.9715, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4316902375394755, |
| "grad_norm": 0.4859452247619629, |
| "learning_rate": 0.0002853479853479853, |
| "loss": 0.7368, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.43278868598105175, |
| "grad_norm": 0.4697234034538269, |
| "learning_rate": 0.0002852258852258852, |
| "loss": 0.4801, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.43388713442262805, |
| "grad_norm": 0.6198891401290894, |
| "learning_rate": 0.0002851037851037851, |
| "loss": 0.5184, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4349855828642043, |
| "grad_norm": 0.531563401222229, |
| "learning_rate": 0.00028498168498168494, |
| "loss": 0.8047, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4360840313057806, |
| "grad_norm": 0.4610724449157715, |
| "learning_rate": 0.00028485958485958485, |
| "loss": 0.4583, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.43718247974735686, |
| "grad_norm": 0.5609697699546814, |
| "learning_rate": 0.0002847374847374847, |
| "loss": 0.7362, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4382809281889331, |
| "grad_norm": 0.5257968306541443, |
| "learning_rate": 0.00028461538461538457, |
| "loss": 0.8173, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.4393793766305094, |
| "grad_norm": 0.8307009339332581, |
| "learning_rate": 0.0002844932844932845, |
| "loss": 0.5507, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.44047782507208566, |
| "grad_norm": 0.36615508794784546, |
| "learning_rate": 0.00028437118437118434, |
| "loss": 0.6605, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.44157627351366197, |
| "grad_norm": 0.35138362646102905, |
| "learning_rate": 0.0002842490842490842, |
| "loss": 0.6614, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4426747219552382, |
| "grad_norm": 0.5054494738578796, |
| "learning_rate": 0.0002841269841269841, |
| "loss": 0.799, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4437731703968145, |
| "grad_norm": 0.4711816608905792, |
| "learning_rate": 0.00028400488400488397, |
| "loss": 0.8892, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.44487161883839077, |
| "grad_norm": 0.5073884725570679, |
| "learning_rate": 0.00028388278388278383, |
| "loss": 0.8156, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.445970067279967, |
| "grad_norm": 0.29938632249832153, |
| "learning_rate": 0.00028376068376068374, |
| "loss": 0.7598, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.4470685157215433, |
| "grad_norm": 1.745937466621399, |
| "learning_rate": 0.00028363858363858365, |
| "loss": 0.7829, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.4481669641631196, |
| "grad_norm": 0.46887943148612976, |
| "learning_rate": 0.00028351648351648346, |
| "loss": 0.7798, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4492654126046959, |
| "grad_norm": 0.4274987280368805, |
| "learning_rate": 0.00028339438339438337, |
| "loss": 0.8407, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.45036386104627213, |
| "grad_norm": 0.4445902109146118, |
| "learning_rate": 0.0002832722832722833, |
| "loss": 0.7394, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.45146230948784843, |
| "grad_norm": 0.3842466175556183, |
| "learning_rate": 0.00028315018315018314, |
| "loss": 0.7781, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4525607579294247, |
| "grad_norm": 0.5660600066184998, |
| "learning_rate": 0.000283028083028083, |
| "loss": 0.8058, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.45365920637100093, |
| "grad_norm": 0.442911297082901, |
| "learning_rate": 0.0002829059829059829, |
| "loss": 0.808, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.45475765481257724, |
| "grad_norm": 0.9051260352134705, |
| "learning_rate": 0.00028278388278388277, |
| "loss": 0.9427, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4558561032541535, |
| "grad_norm": 0.8027593493461609, |
| "learning_rate": 0.00028266178266178263, |
| "loss": 0.531, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4569545516957298, |
| "grad_norm": 0.36242446303367615, |
| "learning_rate": 0.0002825396825396825, |
| "loss": 0.5609, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.45805300013730604, |
| "grad_norm": 0.6095871925354004, |
| "learning_rate": 0.0002824175824175824, |
| "loss": 0.7424, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.45915144857888235, |
| "grad_norm": 0.5102814435958862, |
| "learning_rate": 0.00028229548229548226, |
| "loss": 0.8861, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4602498970204586, |
| "grad_norm": 0.375265896320343, |
| "learning_rate": 0.0002821733821733821, |
| "loss": 0.6235, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4613483454620349, |
| "grad_norm": 0.4506315588951111, |
| "learning_rate": 0.00028205128205128203, |
| "loss": 0.6059, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.46244679390361115, |
| "grad_norm": 0.8119642734527588, |
| "learning_rate": 0.0002819291819291819, |
| "loss": 0.7821, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.4635452423451874, |
| "grad_norm": 0.42945513129234314, |
| "learning_rate": 0.00028180708180708175, |
| "loss": 0.9503, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4646436907867637, |
| "grad_norm": 0.35567665100097656, |
| "learning_rate": 0.00028168498168498166, |
| "loss": 0.5243, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.46574213922833996, |
| "grad_norm": 0.5160343647003174, |
| "learning_rate": 0.00028156288156288157, |
| "loss": 0.5767, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.46684058766991626, |
| "grad_norm": 0.37530624866485596, |
| "learning_rate": 0.00028144078144078143, |
| "loss": 1.2016, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4679390361114925, |
| "grad_norm": 0.5283146500587463, |
| "learning_rate": 0.0002813186813186813, |
| "loss": 0.5958, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4690374845530688, |
| "grad_norm": 0.5217192769050598, |
| "learning_rate": 0.0002811965811965812, |
| "loss": 0.715, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.47013593299464507, |
| "grad_norm": 0.5092077851295471, |
| "learning_rate": 0.00028107448107448106, |
| "loss": 0.6942, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4712343814362213, |
| "grad_norm": 0.7683324813842773, |
| "learning_rate": 0.0002809523809523809, |
| "loss": 1.0185, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4723328298777976, |
| "grad_norm": 0.3117397725582123, |
| "learning_rate": 0.00028083028083028083, |
| "loss": 0.6949, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.47343127831937387, |
| "grad_norm": 0.3218965232372284, |
| "learning_rate": 0.0002807081807081807, |
| "loss": 0.6872, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.4745297267609502, |
| "grad_norm": 1.104121446609497, |
| "learning_rate": 0.00028058608058608055, |
| "loss": 0.6628, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4756281752025264, |
| "grad_norm": 0.3224816620349884, |
| "learning_rate": 0.00028046398046398046, |
| "loss": 0.5974, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.47672662364410273, |
| "grad_norm": 0.5742220878601074, |
| "learning_rate": 0.0002803418803418803, |
| "loss": 0.7248, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.477825072085679, |
| "grad_norm": 0.5449275374412537, |
| "learning_rate": 0.0002802197802197802, |
| "loss": 0.8552, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.47892352052725523, |
| "grad_norm": 0.44660067558288574, |
| "learning_rate": 0.0002800976800976801, |
| "loss": 0.6968, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.48002196896883154, |
| "grad_norm": 0.4287508428096771, |
| "learning_rate": 0.00027997557997557995, |
| "loss": 0.8101, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4811204174104078, |
| "grad_norm": 0.4142225384712219, |
| "learning_rate": 0.00027985347985347986, |
| "loss": 0.5379, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4822188658519841, |
| "grad_norm": 1.246833324432373, |
| "learning_rate": 0.0002797313797313797, |
| "loss": 0.7116, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.48331731429356034, |
| "grad_norm": 0.3845030963420868, |
| "learning_rate": 0.0002796092796092796, |
| "loss": 0.8088, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.48441576273513665, |
| "grad_norm": 1.4492995738983154, |
| "learning_rate": 0.0002794871794871795, |
| "loss": 0.7358, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4855142111767129, |
| "grad_norm": 0.40994521975517273, |
| "learning_rate": 0.00027936507936507935, |
| "loss": 0.6228, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.48661265961828915, |
| "grad_norm": 0.4782777428627014, |
| "learning_rate": 0.0002792429792429792, |
| "loss": 0.4944, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.48771110805986545, |
| "grad_norm": 0.47269922494888306, |
| "learning_rate": 0.0002791208791208791, |
| "loss": 0.7023, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.4888095565014417, |
| "grad_norm": 0.5529118776321411, |
| "learning_rate": 0.000278998778998779, |
| "loss": 0.7717, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.489908004943018, |
| "grad_norm": 0.4244072139263153, |
| "learning_rate": 0.00027887667887667884, |
| "loss": 0.7902, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.49100645338459425, |
| "grad_norm": 1.4737539291381836, |
| "learning_rate": 0.00027875457875457875, |
| "loss": 0.5784, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.49210490182617056, |
| "grad_norm": 0.40120208263397217, |
| "learning_rate": 0.0002786324786324786, |
| "loss": 0.7974, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4932033502677468, |
| "grad_norm": 0.5481031537055969, |
| "learning_rate": 0.00027851037851037846, |
| "loss": 0.7867, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.49430179870932306, |
| "grad_norm": 0.36719343066215515, |
| "learning_rate": 0.0002783882783882784, |
| "loss": 0.6543, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.49540024715089936, |
| "grad_norm": 0.3980066776275635, |
| "learning_rate": 0.00027826617826617824, |
| "loss": 0.5395, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.4964986955924756, |
| "grad_norm": 0.45570313930511475, |
| "learning_rate": 0.0002781440781440781, |
| "loss": 0.7908, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.4975971440340519, |
| "grad_norm": 0.41858601570129395, |
| "learning_rate": 0.000278021978021978, |
| "loss": 0.5248, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.49869559247562817, |
| "grad_norm": 0.5019702315330505, |
| "learning_rate": 0.00027789987789987786, |
| "loss": 0.8006, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4997940409172045, |
| "grad_norm": 0.4589880108833313, |
| "learning_rate": 0.0002777777777777778, |
| "loss": 0.7294, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5008924893587807, |
| "grad_norm": 0.5679266452789307, |
| "learning_rate": 0.00027765567765567764, |
| "loss": 0.651, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.501990937800357, |
| "grad_norm": 0.4854479134082794, |
| "learning_rate": 0.0002775335775335775, |
| "loss": 0.9908, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5030893862419332, |
| "grad_norm": 0.4964112341403961, |
| "learning_rate": 0.0002774114774114774, |
| "loss": 0.8084, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5041878346835096, |
| "grad_norm": 0.5130513906478882, |
| "learning_rate": 0.00027728937728937727, |
| "loss": 0.8389, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5052862831250858, |
| "grad_norm": 0.4784137010574341, |
| "learning_rate": 0.0002771672771672771, |
| "loss": 0.5497, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5063847315666621, |
| "grad_norm": 0.28685998916625977, |
| "learning_rate": 0.00027704517704517704, |
| "loss": 0.491, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5074831800082383, |
| "grad_norm": 0.5337100625038147, |
| "learning_rate": 0.0002769230769230769, |
| "loss": 0.8315, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5085816284498146, |
| "grad_norm": 0.5431344509124756, |
| "learning_rate": 0.00027680097680097675, |
| "loss": 0.5996, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.5096800768913909, |
| "grad_norm": 0.4546130299568176, |
| "learning_rate": 0.00027667887667887667, |
| "loss": 0.5647, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5107785253329672, |
| "grad_norm": 0.6298655271530151, |
| "learning_rate": 0.0002765567765567765, |
| "loss": 0.7684, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5118769737745434, |
| "grad_norm": 0.44330841302871704, |
| "learning_rate": 0.0002764346764346764, |
| "loss": 0.4906, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5129754222161197, |
| "grad_norm": 0.3824306130409241, |
| "learning_rate": 0.0002763125763125763, |
| "loss": 0.6123, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.514073870657696, |
| "grad_norm": 0.3225514590740204, |
| "learning_rate": 0.00027619047619047615, |
| "loss": 0.7535, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5151723190992723, |
| "grad_norm": 0.701239824295044, |
| "learning_rate": 0.00027606837606837607, |
| "loss": 0.9643, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5162707675408486, |
| "grad_norm": 0.37800920009613037, |
| "learning_rate": 0.0002759462759462759, |
| "loss": 0.543, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5173692159824248, |
| "grad_norm": 0.3521328568458557, |
| "learning_rate": 0.0002758241758241758, |
| "loss": 0.7157, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.518467664424001, |
| "grad_norm": 0.2659924626350403, |
| "learning_rate": 0.0002757020757020757, |
| "loss": 0.7334, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5195661128655774, |
| "grad_norm": 0.42815065383911133, |
| "learning_rate": 0.00027557997557997555, |
| "loss": 1.2015, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5206645613071537, |
| "grad_norm": 0.7758998870849609, |
| "learning_rate": 0.0002754578754578754, |
| "loss": 0.9493, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5217630097487299, |
| "grad_norm": 0.46281251311302185, |
| "learning_rate": 0.0002753357753357753, |
| "loss": 0.9159, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5228614581903062, |
| "grad_norm": 0.3668971061706543, |
| "learning_rate": 0.0002752136752136752, |
| "loss": 0.4869, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5239599066318824, |
| "grad_norm": 0.462534099817276, |
| "learning_rate": 0.00027509157509157504, |
| "loss": 0.6439, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5250583550734588, |
| "grad_norm": 0.6341688632965088, |
| "learning_rate": 0.00027496947496947495, |
| "loss": 0.6948, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.526156803515035, |
| "grad_norm": 0.5469139814376831, |
| "learning_rate": 0.0002748473748473748, |
| "loss": 1.016, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5272552519566113, |
| "grad_norm": 0.438204288482666, |
| "learning_rate": 0.00027472527472527467, |
| "loss": 0.6941, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5283537003981875, |
| "grad_norm": 0.586700975894928, |
| "learning_rate": 0.0002746031746031746, |
| "loss": 0.6649, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5294521488397639, |
| "grad_norm": 0.4077949523925781, |
| "learning_rate": 0.0002744810744810745, |
| "loss": 0.5948, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5305505972813401, |
| "grad_norm": 0.3756411373615265, |
| "learning_rate": 0.0002743589743589743, |
| "loss": 0.4915, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5316490457229164, |
| "grad_norm": 1.2067008018493652, |
| "learning_rate": 0.0002742368742368742, |
| "loss": 0.8795, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5327474941644926, |
| "grad_norm": 0.3097778260707855, |
| "learning_rate": 0.0002741147741147741, |
| "loss": 0.5478, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5338459426060689, |
| "grad_norm": 0.5536866188049316, |
| "learning_rate": 0.000273992673992674, |
| "loss": 0.7042, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5349443910476452, |
| "grad_norm": 0.5930231809616089, |
| "learning_rate": 0.00027387057387057384, |
| "loss": 0.7108, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5360428394892215, |
| "grad_norm": 0.39304253458976746, |
| "learning_rate": 0.00027374847374847375, |
| "loss": 0.788, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5371412879307977, |
| "grad_norm": 0.5238274335861206, |
| "learning_rate": 0.0002736263736263736, |
| "loss": 0.9887, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.538239736372374, |
| "grad_norm": 0.5993770956993103, |
| "learning_rate": 0.00027350427350427347, |
| "loss": 0.7819, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5393381848139503, |
| "grad_norm": 0.4601563811302185, |
| "learning_rate": 0.00027338217338217333, |
| "loss": 0.4347, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5404366332555266, |
| "grad_norm": 0.5292415022850037, |
| "learning_rate": 0.00027326007326007324, |
| "loss": 0.5248, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5415350816971028, |
| "grad_norm": 0.37247565388679504, |
| "learning_rate": 0.0002731379731379731, |
| "loss": 0.5412, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5426335301386791, |
| "grad_norm": 0.6865994930267334, |
| "learning_rate": 0.00027301587301587296, |
| "loss": 0.8263, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5437319785802553, |
| "grad_norm": 0.5019715428352356, |
| "learning_rate": 0.00027289377289377287, |
| "loss": 0.7084, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5448304270218317, |
| "grad_norm": 0.8432828783988953, |
| "learning_rate": 0.00027277167277167273, |
| "loss": 0.6188, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.545928875463408, |
| "grad_norm": 0.594881534576416, |
| "learning_rate": 0.0002726495726495726, |
| "loss": 0.8923, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5470273239049842, |
| "grad_norm": 0.5573694705963135, |
| "learning_rate": 0.0002725274725274725, |
| "loss": 0.6351, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5481257723465605, |
| "grad_norm": 0.30426710844039917, |
| "learning_rate": 0.0002724053724053724, |
| "loss": 0.6359, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5492242207881367, |
| "grad_norm": 0.759385883808136, |
| "learning_rate": 0.00027228327228327227, |
| "loss": 0.6131, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5503226692297131, |
| "grad_norm": 0.5436901450157166, |
| "learning_rate": 0.00027216117216117213, |
| "loss": 0.5232, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5514211176712893, |
| "grad_norm": 0.5924163460731506, |
| "learning_rate": 0.00027203907203907204, |
| "loss": 0.9594, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5525195661128656, |
| "grad_norm": 0.49177658557891846, |
| "learning_rate": 0.0002719169719169719, |
| "loss": 0.842, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5536180145544418, |
| "grad_norm": 0.4437295198440552, |
| "learning_rate": 0.00027179487179487176, |
| "loss": 1.0338, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5547164629960182, |
| "grad_norm": 0.426213800907135, |
| "learning_rate": 0.00027167277167277167, |
| "loss": 0.6375, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5558149114375944, |
| "grad_norm": 0.4599516689777374, |
| "learning_rate": 0.00027155067155067153, |
| "loss": 0.5005, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5569133598791707, |
| "grad_norm": 0.647957980632782, |
| "learning_rate": 0.0002714285714285714, |
| "loss": 0.6292, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5580118083207469, |
| "grad_norm": 0.7891755104064941, |
| "learning_rate": 0.0002713064713064713, |
| "loss": 0.697, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5591102567623232, |
| "grad_norm": 0.5290817618370056, |
| "learning_rate": 0.00027118437118437116, |
| "loss": 0.4547, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5602087052038995, |
| "grad_norm": 0.4025941789150238, |
| "learning_rate": 0.000271062271062271, |
| "loss": 0.6299, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5613071536454758, |
| "grad_norm": 0.7768287658691406, |
| "learning_rate": 0.00027094017094017093, |
| "loss": 0.6813, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.562405602087052, |
| "grad_norm": 0.6977662444114685, |
| "learning_rate": 0.0002708180708180708, |
| "loss": 0.8217, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5635040505286283, |
| "grad_norm": 0.5238949060440063, |
| "learning_rate": 0.0002706959706959707, |
| "loss": 0.7348, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5646024989702045, |
| "grad_norm": 0.5099830627441406, |
| "learning_rate": 0.00027057387057387056, |
| "loss": 0.9894, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5657009474117809, |
| "grad_norm": 0.6254756450653076, |
| "learning_rate": 0.0002704517704517704, |
| "loss": 0.9258, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5667993958533571, |
| "grad_norm": 0.40313196182250977, |
| "learning_rate": 0.00027032967032967033, |
| "loss": 0.8115, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5678978442949334, |
| "grad_norm": 0.9706575274467468, |
| "learning_rate": 0.0002702075702075702, |
| "loss": 0.5204, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5689962927365096, |
| "grad_norm": 0.36777085065841675, |
| "learning_rate": 0.00027008547008547005, |
| "loss": 0.7716, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.570094741178086, |
| "grad_norm": 0.48726886510849, |
| "learning_rate": 0.00026996336996336996, |
| "loss": 0.7745, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5711931896196623, |
| "grad_norm": 0.3590470850467682, |
| "learning_rate": 0.0002698412698412698, |
| "loss": 0.7038, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5722916380612385, |
| "grad_norm": 0.7103118896484375, |
| "learning_rate": 0.0002697191697191697, |
| "loss": 0.8368, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5733900865028148, |
| "grad_norm": 0.5503933429718018, |
| "learning_rate": 0.0002695970695970696, |
| "loss": 0.6164, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.574488534944391, |
| "grad_norm": 0.5255150198936462, |
| "learning_rate": 0.00026947496947496945, |
| "loss": 0.8886, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5755869833859674, |
| "grad_norm": 0.4872569739818573, |
| "learning_rate": 0.0002693528693528693, |
| "loss": 0.6277, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5766854318275436, |
| "grad_norm": 0.3748464584350586, |
| "learning_rate": 0.0002692307692307692, |
| "loss": 0.6471, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5777838802691199, |
| "grad_norm": 0.4401276111602783, |
| "learning_rate": 0.0002691086691086691, |
| "loss": 0.9846, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5788823287106961, |
| "grad_norm": 0.9565305709838867, |
| "learning_rate": 0.00026898656898656894, |
| "loss": 0.9471, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5799807771522724, |
| "grad_norm": 0.6307245492935181, |
| "learning_rate": 0.00026886446886446885, |
| "loss": 0.9168, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5810792255938487, |
| "grad_norm": 0.49177634716033936, |
| "learning_rate": 0.0002687423687423687, |
| "loss": 0.5464, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.582177674035425, |
| "grad_norm": 0.68553626537323, |
| "learning_rate": 0.0002686202686202686, |
| "loss": 0.5874, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5832761224770012, |
| "grad_norm": 0.3811597228050232, |
| "learning_rate": 0.0002684981684981685, |
| "loss": 0.766, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5843745709185775, |
| "grad_norm": 0.6634503602981567, |
| "learning_rate": 0.00026837606837606834, |
| "loss": 0.6438, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5854730193601538, |
| "grad_norm": 0.6115571856498718, |
| "learning_rate": 0.00026825396825396825, |
| "loss": 0.8757, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5865714678017301, |
| "grad_norm": 0.3011985719203949, |
| "learning_rate": 0.0002681318681318681, |
| "loss": 0.6188, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5876699162433063, |
| "grad_norm": 0.7029386162757874, |
| "learning_rate": 0.00026800976800976797, |
| "loss": 0.8681, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5887683646848826, |
| "grad_norm": 0.4796508550643921, |
| "learning_rate": 0.0002678876678876679, |
| "loss": 0.7207, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5898668131264588, |
| "grad_norm": 0.542948842048645, |
| "learning_rate": 0.00026776556776556774, |
| "loss": 0.5587, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5909652615680352, |
| "grad_norm": 0.7566731572151184, |
| "learning_rate": 0.0002676434676434676, |
| "loss": 0.8562, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5920637100096114, |
| "grad_norm": 0.6411837339401245, |
| "learning_rate": 0.0002675213675213675, |
| "loss": 0.4516, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5931621584511877, |
| "grad_norm": 0.41434159874916077, |
| "learning_rate": 0.00026739926739926737, |
| "loss": 0.7069, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5942606068927639, |
| "grad_norm": 0.29941752552986145, |
| "learning_rate": 0.0002672771672771672, |
| "loss": 0.7444, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5953590553343402, |
| "grad_norm": 1.8168927431106567, |
| "learning_rate": 0.00026715506715506714, |
| "loss": 0.4947, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5964575037759166, |
| "grad_norm": 0.5639868974685669, |
| "learning_rate": 0.000267032967032967, |
| "loss": 0.6749, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5975559522174928, |
| "grad_norm": 0.5054119229316711, |
| "learning_rate": 0.0002669108669108669, |
| "loss": 0.8075, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.598654400659069, |
| "grad_norm": 0.3531246483325958, |
| "learning_rate": 0.00026678876678876677, |
| "loss": 0.6986, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5997528491006453, |
| "grad_norm": 0.36428287625312805, |
| "learning_rate": 0.0002666666666666666, |
| "loss": 0.6496, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6008512975422217, |
| "grad_norm": 0.45706960558891296, |
| "learning_rate": 0.00026654456654456654, |
| "loss": 0.5646, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6019497459837979, |
| "grad_norm": 0.39326363801956177, |
| "learning_rate": 0.0002664224664224664, |
| "loss": 0.5037, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6030481944253742, |
| "grad_norm": 0.7158151268959045, |
| "learning_rate": 0.00026630036630036625, |
| "loss": 0.5643, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.6041466428669504, |
| "grad_norm": 0.398335337638855, |
| "learning_rate": 0.00026617826617826617, |
| "loss": 0.5462, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6052450913085267, |
| "grad_norm": 0.8625812530517578, |
| "learning_rate": 0.000266056166056166, |
| "loss": 0.7898, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.606343539750103, |
| "grad_norm": 0.5558099150657654, |
| "learning_rate": 0.0002659340659340659, |
| "loss": 0.7968, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6074419881916793, |
| "grad_norm": 0.6244741678237915, |
| "learning_rate": 0.0002658119658119658, |
| "loss": 0.9085, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6085404366332555, |
| "grad_norm": 0.4907127916812897, |
| "learning_rate": 0.00026568986568986565, |
| "loss": 0.5683, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6096388850748318, |
| "grad_norm": 0.6140159964561462, |
| "learning_rate": 0.0002655677655677655, |
| "loss": 0.5693, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.610737333516408, |
| "grad_norm": 0.41251274943351746, |
| "learning_rate": 0.0002654456654456654, |
| "loss": 0.728, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6118357819579844, |
| "grad_norm": 0.43427684903144836, |
| "learning_rate": 0.00026532356532356534, |
| "loss": 0.5692, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6129342303995606, |
| "grad_norm": 0.41471078991889954, |
| "learning_rate": 0.00026520146520146514, |
| "loss": 0.6616, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6140326788411369, |
| "grad_norm": 0.4406953752040863, |
| "learning_rate": 0.00026507936507936506, |
| "loss": 0.4764, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6151311272827131, |
| "grad_norm": 7.233060359954834, |
| "learning_rate": 0.00026495726495726497, |
| "loss": 0.6111, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6162295757242895, |
| "grad_norm": 0.47008857131004333, |
| "learning_rate": 0.0002648351648351648, |
| "loss": 0.8145, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6173280241658657, |
| "grad_norm": 0.47636717557907104, |
| "learning_rate": 0.0002647130647130647, |
| "loss": 0.8036, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.618426472607442, |
| "grad_norm": 0.526971161365509, |
| "learning_rate": 0.0002645909645909646, |
| "loss": 0.7559, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6195249210490182, |
| "grad_norm": 0.5027382373809814, |
| "learning_rate": 0.00026446886446886446, |
| "loss": 0.7765, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6206233694905945, |
| "grad_norm": 0.4222506284713745, |
| "learning_rate": 0.0002643467643467643, |
| "loss": 0.6376, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6217218179321709, |
| "grad_norm": 0.6390372514724731, |
| "learning_rate": 0.0002642246642246642, |
| "loss": 0.8224, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6228202663737471, |
| "grad_norm": 0.44495514035224915, |
| "learning_rate": 0.0002641025641025641, |
| "loss": 0.5995, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6239187148153233, |
| "grad_norm": 0.7005137205123901, |
| "learning_rate": 0.00026398046398046394, |
| "loss": 0.4986, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6250171632568996, |
| "grad_norm": 0.40745365619659424, |
| "learning_rate": 0.0002638583638583638, |
| "loss": 0.608, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6261156116984758, |
| "grad_norm": 0.3449142277240753, |
| "learning_rate": 0.0002637362637362637, |
| "loss": 0.6253, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6272140601400522, |
| "grad_norm": 0.4318457841873169, |
| "learning_rate": 0.00026361416361416357, |
| "loss": 0.6376, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6283125085816285, |
| "grad_norm": 2.2202258110046387, |
| "learning_rate": 0.00026349206349206343, |
| "loss": 0.5477, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6294109570232047, |
| "grad_norm": 0.6759721040725708, |
| "learning_rate": 0.00026336996336996334, |
| "loss": 1.1176, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.630509405464781, |
| "grad_norm": 1.7796927690505981, |
| "learning_rate": 0.00026324786324786326, |
| "loss": 0.8713, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6316078539063573, |
| "grad_norm": 0.32952558994293213, |
| "learning_rate": 0.0002631257631257631, |
| "loss": 0.4711, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6327063023479336, |
| "grad_norm": 0.40390628576278687, |
| "learning_rate": 0.000263003663003663, |
| "loss": 0.5412, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6338047507895098, |
| "grad_norm": 0.7439208030700684, |
| "learning_rate": 0.0002628815628815629, |
| "loss": 0.7094, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6349031992310861, |
| "grad_norm": 0.34505775570869446, |
| "learning_rate": 0.00026275946275946274, |
| "loss": 0.5939, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6360016476726623, |
| "grad_norm": 0.9452011585235596, |
| "learning_rate": 0.0002626373626373626, |
| "loss": 0.5108, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6371000961142387, |
| "grad_norm": 0.42789551615715027, |
| "learning_rate": 0.0002625152625152625, |
| "loss": 0.5661, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6381985445558149, |
| "grad_norm": 0.3460575044155121, |
| "learning_rate": 0.0002623931623931624, |
| "loss": 0.8333, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6392969929973912, |
| "grad_norm": 0.8932168483734131, |
| "learning_rate": 0.00026227106227106223, |
| "loss": 0.7058, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6403954414389674, |
| "grad_norm": 0.8588842749595642, |
| "learning_rate": 0.00026214896214896214, |
| "loss": 0.6905, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6414938898805437, |
| "grad_norm": 0.5097251534461975, |
| "learning_rate": 0.000262026862026862, |
| "loss": 0.8189, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.64259233832212, |
| "grad_norm": 0.45746755599975586, |
| "learning_rate": 0.00026190476190476186, |
| "loss": 0.7212, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6436907867636963, |
| "grad_norm": 0.9576689600944519, |
| "learning_rate": 0.0002617826617826618, |
| "loss": 0.6159, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6447892352052725, |
| "grad_norm": 0.5721899271011353, |
| "learning_rate": 0.00026166056166056163, |
| "loss": 0.6083, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6458876836468488, |
| "grad_norm": 0.4851115942001343, |
| "learning_rate": 0.00026153846153846154, |
| "loss": 0.7678, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6469861320884251, |
| "grad_norm": 0.6631761193275452, |
| "learning_rate": 0.0002614163614163614, |
| "loss": 0.7068, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6480845805300014, |
| "grad_norm": 0.6862382292747498, |
| "learning_rate": 0.00026129426129426126, |
| "loss": 0.5766, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6491830289715776, |
| "grad_norm": 0.3754968047142029, |
| "learning_rate": 0.0002611721611721612, |
| "loss": 0.7254, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6502814774131539, |
| "grad_norm": 0.5239700078964233, |
| "learning_rate": 0.00026105006105006103, |
| "loss": 0.5777, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6513799258547301, |
| "grad_norm": 0.5103443264961243, |
| "learning_rate": 0.0002609279609279609, |
| "loss": 1.0006, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6524783742963065, |
| "grad_norm": 0.4733884632587433, |
| "learning_rate": 0.0002608058608058608, |
| "loss": 0.6851, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6535768227378828, |
| "grad_norm": 0.5982065796852112, |
| "learning_rate": 0.00026068376068376066, |
| "loss": 0.6295, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.654675271179459, |
| "grad_norm": 1.2408190965652466, |
| "learning_rate": 0.0002605616605616605, |
| "loss": 0.8806, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6557737196210353, |
| "grad_norm": 0.6005455851554871, |
| "learning_rate": 0.00026043956043956043, |
| "loss": 0.7186, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6568721680626116, |
| "grad_norm": 0.33777105808258057, |
| "learning_rate": 0.0002603174603174603, |
| "loss": 0.4599, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6579706165041879, |
| "grad_norm": 0.5336529612541199, |
| "learning_rate": 0.00026019536019536015, |
| "loss": 0.553, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6590690649457641, |
| "grad_norm": 0.6930931806564331, |
| "learning_rate": 0.00026007326007326006, |
| "loss": 0.5686, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6601675133873404, |
| "grad_norm": 1.1340439319610596, |
| "learning_rate": 0.0002599511599511599, |
| "loss": 0.5886, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6612659618289166, |
| "grad_norm": 0.9833797812461853, |
| "learning_rate": 0.0002598290598290598, |
| "loss": 0.7109, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.662364410270493, |
| "grad_norm": 0.9305315017700195, |
| "learning_rate": 0.0002597069597069597, |
| "loss": 0.8341, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6634628587120692, |
| "grad_norm": 0.9753265380859375, |
| "learning_rate": 0.00025958485958485955, |
| "loss": 0.7102, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6645613071536455, |
| "grad_norm": 2.2342822551727295, |
| "learning_rate": 0.00025946275946275946, |
| "loss": 0.6784, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6656597555952217, |
| "grad_norm": 0.6815157532691956, |
| "learning_rate": 0.0002593406593406593, |
| "loss": 0.7689, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.666758204036798, |
| "grad_norm": 0.7792591452598572, |
| "learning_rate": 0.0002592185592185592, |
| "loss": 0.9444, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6678566524783743, |
| "grad_norm": 0.668251097202301, |
| "learning_rate": 0.0002590964590964591, |
| "loss": 0.6899, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6689551009199506, |
| "grad_norm": 0.5041349530220032, |
| "learning_rate": 0.00025897435897435895, |
| "loss": 0.652, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6700535493615268, |
| "grad_norm": 0.35069939494132996, |
| "learning_rate": 0.0002588522588522588, |
| "loss": 0.8102, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6711519978031031, |
| "grad_norm": 3.324793577194214, |
| "learning_rate": 0.0002587301587301587, |
| "loss": 0.7936, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6722504462446794, |
| "grad_norm": 0.6778903007507324, |
| "learning_rate": 0.0002586080586080586, |
| "loss": 0.6258, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6733488946862557, |
| "grad_norm": 3.034745454788208, |
| "learning_rate": 0.00025848595848595844, |
| "loss": 0.697, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6744473431278319, |
| "grad_norm": 2.563870429992676, |
| "learning_rate": 0.00025836385836385835, |
| "loss": 0.7596, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6755457915694082, |
| "grad_norm": 0.45592913031578064, |
| "learning_rate": 0.0002582417582417582, |
| "loss": 0.7753, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6766442400109844, |
| "grad_norm": 0.7209720015525818, |
| "learning_rate": 0.00025811965811965807, |
| "loss": 0.6907, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6777426884525608, |
| "grad_norm": 0.4611949026584625, |
| "learning_rate": 0.000257997557997558, |
| "loss": 0.5896, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.678841136894137, |
| "grad_norm": 1.3885395526885986, |
| "learning_rate": 0.0002578754578754579, |
| "loss": 0.6344, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6799395853357133, |
| "grad_norm": 0.544572651386261, |
| "learning_rate": 0.00025775335775335775, |
| "loss": 0.586, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6810380337772896, |
| "grad_norm": 0.5637034177780151, |
| "learning_rate": 0.0002576312576312576, |
| "loss": 0.8284, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6821364822188658, |
| "grad_norm": 1.170779824256897, |
| "learning_rate": 0.00025750915750915747, |
| "loss": 0.8818, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6832349306604422, |
| "grad_norm": 0.4877263605594635, |
| "learning_rate": 0.0002573870573870574, |
| "loss": 0.9179, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6843333791020184, |
| "grad_norm": 0.6684415340423584, |
| "learning_rate": 0.00025726495726495724, |
| "loss": 0.7358, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6854318275435947, |
| "grad_norm": 0.6679075956344604, |
| "learning_rate": 0.0002571428571428571, |
| "loss": 0.6342, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6865302759851709, |
| "grad_norm": 0.65242600440979, |
| "learning_rate": 0.000257020757020757, |
| "loss": 0.4762, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6876287244267473, |
| "grad_norm": 0.806523859500885, |
| "learning_rate": 0.00025689865689865687, |
| "loss": 0.7621, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6887271728683235, |
| "grad_norm": 1.09652578830719, |
| "learning_rate": 0.0002567765567765567, |
| "loss": 0.6594, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6898256213098998, |
| "grad_norm": 0.412505179643631, |
| "learning_rate": 0.00025665445665445664, |
| "loss": 0.8026, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.690924069751476, |
| "grad_norm": 0.5801676511764526, |
| "learning_rate": 0.0002565323565323565, |
| "loss": 0.7026, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6920225181930523, |
| "grad_norm": 0.6822883486747742, |
| "learning_rate": 0.00025641025641025636, |
| "loss": 0.4372, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6931209666346286, |
| "grad_norm": 0.3455508351325989, |
| "learning_rate": 0.00025628815628815627, |
| "loss": 0.5624, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6942194150762049, |
| "grad_norm": 0.3533216714859009, |
| "learning_rate": 0.0002561660561660562, |
| "loss": 0.7493, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6953178635177811, |
| "grad_norm": 1.4306656122207642, |
| "learning_rate": 0.000256043956043956, |
| "loss": 0.7537, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6964163119593574, |
| "grad_norm": 0.336393266916275, |
| "learning_rate": 0.0002559218559218559, |
| "loss": 0.787, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6975147604009336, |
| "grad_norm": 0.5303547382354736, |
| "learning_rate": 0.0002557997557997558, |
| "loss": 0.5604, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.69861320884251, |
| "grad_norm": 0.5421821475028992, |
| "learning_rate": 0.00025567765567765567, |
| "loss": 0.6905, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6997116572840862, |
| "grad_norm": 0.5445061922073364, |
| "learning_rate": 0.00025555555555555553, |
| "loss": 0.6389, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7008101057256625, |
| "grad_norm": 0.42832881212234497, |
| "learning_rate": 0.00025543345543345544, |
| "loss": 0.7825, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7019085541672387, |
| "grad_norm": 1.4624862670898438, |
| "learning_rate": 0.0002553113553113553, |
| "loss": 0.4964, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.7030070026088151, |
| "grad_norm": 0.38657426834106445, |
| "learning_rate": 0.00025518925518925516, |
| "loss": 0.5299, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7041054510503914, |
| "grad_norm": 14.422834396362305, |
| "learning_rate": 0.00025506715506715507, |
| "loss": 0.5008, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7052038994919676, |
| "grad_norm": 0.591106653213501, |
| "learning_rate": 0.00025494505494505493, |
| "loss": 0.6732, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7063023479335439, |
| "grad_norm": 1.6697375774383545, |
| "learning_rate": 0.0002548229548229548, |
| "loss": 0.6782, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.7074007963751201, |
| "grad_norm": 1.670777678489685, |
| "learning_rate": 0.0002547008547008547, |
| "loss": 0.5275, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7084992448166965, |
| "grad_norm": 2.3361563682556152, |
| "learning_rate": 0.00025457875457875456, |
| "loss": 0.4177, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.7095976932582727, |
| "grad_norm": 1.823844313621521, |
| "learning_rate": 0.0002544566544566544, |
| "loss": 0.5438, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.710696141699849, |
| "grad_norm": 0.5374146699905396, |
| "learning_rate": 0.0002543345543345543, |
| "loss": 0.6704, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7117945901414252, |
| "grad_norm": 0.9709361791610718, |
| "learning_rate": 0.0002542124542124542, |
| "loss": 0.8896, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7128930385830015, |
| "grad_norm": 0.7118197083473206, |
| "learning_rate": 0.0002540903540903541, |
| "loss": 0.766, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7139914870245778, |
| "grad_norm": 0.4597225487232208, |
| "learning_rate": 0.00025396825396825396, |
| "loss": 0.7498, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7150899354661541, |
| "grad_norm": 0.9708977937698364, |
| "learning_rate": 0.0002538461538461538, |
| "loss": 0.7602, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7161883839077303, |
| "grad_norm": 0.8156960606575012, |
| "learning_rate": 0.00025372405372405373, |
| "loss": 1.1105, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7172868323493066, |
| "grad_norm": 1.4135644435882568, |
| "learning_rate": 0.0002536019536019536, |
| "loss": 0.9203, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7183852807908829, |
| "grad_norm": 0.5754226446151733, |
| "learning_rate": 0.00025347985347985344, |
| "loss": 0.5368, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7194837292324592, |
| "grad_norm": 1.7644588947296143, |
| "learning_rate": 0.00025335775335775336, |
| "loss": 0.6451, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7205821776740354, |
| "grad_norm": 4.35576868057251, |
| "learning_rate": 0.0002532356532356532, |
| "loss": 0.6732, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7216806261156117, |
| "grad_norm": 1.1072558164596558, |
| "learning_rate": 0.0002531135531135531, |
| "loss": 0.7901, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7227790745571879, |
| "grad_norm": 0.3916113078594208, |
| "learning_rate": 0.000252991452991453, |
| "loss": 0.7153, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7238775229987643, |
| "grad_norm": 1.055137276649475, |
| "learning_rate": 0.00025286935286935285, |
| "loss": 0.8664, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7249759714403405, |
| "grad_norm": 0.5966087579727173, |
| "learning_rate": 0.0002527472527472527, |
| "loss": 0.933, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7260744198819168, |
| "grad_norm": 0.40958529710769653, |
| "learning_rate": 0.0002526251526251526, |
| "loss": 0.7196, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.727172868323493, |
| "grad_norm": 0.4636710584163666, |
| "learning_rate": 0.0002525030525030525, |
| "loss": 0.7039, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7282713167650693, |
| "grad_norm": 0.6967337131500244, |
| "learning_rate": 0.0002523809523809524, |
| "loss": 0.8981, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7293697652066456, |
| "grad_norm": 0.49781784415245056, |
| "learning_rate": 0.00025225885225885225, |
| "loss": 0.7239, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7304682136482219, |
| "grad_norm": 0.940851628780365, |
| "learning_rate": 0.0002521367521367521, |
| "loss": 0.8199, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7315666620897981, |
| "grad_norm": 1.0271226167678833, |
| "learning_rate": 0.000252014652014652, |
| "loss": 0.6757, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7326651105313744, |
| "grad_norm": 0.5299912095069885, |
| "learning_rate": 0.0002518925518925519, |
| "loss": 0.8464, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7337635589729508, |
| "grad_norm": 0.7060052156448364, |
| "learning_rate": 0.00025177045177045173, |
| "loss": 0.6541, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.734862007414527, |
| "grad_norm": 0.5419691205024719, |
| "learning_rate": 0.00025164835164835165, |
| "loss": 0.8741, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7359604558561033, |
| "grad_norm": 0.6363463401794434, |
| "learning_rate": 0.0002515262515262515, |
| "loss": 0.7224, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7370589042976795, |
| "grad_norm": 0.7622922658920288, |
| "learning_rate": 0.00025140415140415136, |
| "loss": 0.9402, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7381573527392558, |
| "grad_norm": 0.7477490305900574, |
| "learning_rate": 0.0002512820512820513, |
| "loss": 0.6036, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7392558011808321, |
| "grad_norm": 0.4813562333583832, |
| "learning_rate": 0.00025115995115995113, |
| "loss": 0.5982, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7403542496224084, |
| "grad_norm": 3.112766981124878, |
| "learning_rate": 0.000251037851037851, |
| "loss": 0.5825, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7414526980639846, |
| "grad_norm": 0.9523088932037354, |
| "learning_rate": 0.0002509157509157509, |
| "loss": 0.5698, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7425511465055609, |
| "grad_norm": 0.3426001965999603, |
| "learning_rate": 0.00025079365079365076, |
| "loss": 0.5516, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7436495949471371, |
| "grad_norm": 0.4866350591182709, |
| "learning_rate": 0.0002506715506715506, |
| "loss": 0.5466, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7447480433887135, |
| "grad_norm": 0.6590595245361328, |
| "learning_rate": 0.00025054945054945053, |
| "loss": 0.7579, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7458464918302897, |
| "grad_norm": 0.36733704805374146, |
| "learning_rate": 0.0002504273504273504, |
| "loss": 0.5114, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.746944940271866, |
| "grad_norm": 0.5890951156616211, |
| "learning_rate": 0.0002503052503052503, |
| "loss": 0.7196, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7480433887134422, |
| "grad_norm": 0.8393438458442688, |
| "learning_rate": 0.00025018315018315016, |
| "loss": 0.6291, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7491418371550186, |
| "grad_norm": 0.9745636582374573, |
| "learning_rate": 0.00025006105006105, |
| "loss": 0.8675, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7502402855965948, |
| "grad_norm": 1.1764310598373413, |
| "learning_rate": 0.00024993894993894993, |
| "loss": 0.9384, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7513387340381711, |
| "grad_norm": 0.6199970245361328, |
| "learning_rate": 0.0002498168498168498, |
| "loss": 0.5984, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7524371824797473, |
| "grad_norm": 2.2708802223205566, |
| "learning_rate": 0.00024969474969474965, |
| "loss": 0.7867, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7535356309213236, |
| "grad_norm": 0.6731462478637695, |
| "learning_rate": 0.00024957264957264956, |
| "loss": 0.5377, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7546340793629, |
| "grad_norm": 0.991669774055481, |
| "learning_rate": 0.0002494505494505494, |
| "loss": 0.7015, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7557325278044762, |
| "grad_norm": 0.5873506665229797, |
| "learning_rate": 0.0002493284493284493, |
| "loss": 0.567, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7568309762460524, |
| "grad_norm": 1.5025473833084106, |
| "learning_rate": 0.0002492063492063492, |
| "loss": 0.6264, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7579294246876287, |
| "grad_norm": 0.4942665696144104, |
| "learning_rate": 0.00024908424908424905, |
| "loss": 0.7623, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7590278731292049, |
| "grad_norm": 0.5522105693817139, |
| "learning_rate": 0.0002489621489621489, |
| "loss": 0.6192, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7601263215707813, |
| "grad_norm": 1.25243079662323, |
| "learning_rate": 0.0002488400488400488, |
| "loss": 0.8547, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7612247700123576, |
| "grad_norm": 0.5228685140609741, |
| "learning_rate": 0.00024871794871794874, |
| "loss": 0.7365, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7623232184539338, |
| "grad_norm": 1.5090827941894531, |
| "learning_rate": 0.0002485958485958486, |
| "loss": 0.9226, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.76342166689551, |
| "grad_norm": 3.3617379665374756, |
| "learning_rate": 0.00024847374847374845, |
| "loss": 0.7942, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7645201153370864, |
| "grad_norm": 0.5350137948989868, |
| "learning_rate": 0.0002483516483516483, |
| "loss": 0.6254, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7656185637786627, |
| "grad_norm": 0.8871312141418457, |
| "learning_rate": 0.0002482295482295482, |
| "loss": 0.8241, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7667170122202389, |
| "grad_norm": 0.48593926429748535, |
| "learning_rate": 0.0002481074481074481, |
| "loss": 0.5707, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7678154606618152, |
| "grad_norm": 0.7460000514984131, |
| "learning_rate": 0.00024798534798534794, |
| "loss": 0.9521, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7689139091033914, |
| "grad_norm": 0.7105034589767456, |
| "learning_rate": 0.00024786324786324785, |
| "loss": 0.7513, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7700123575449678, |
| "grad_norm": 0.40251481533050537, |
| "learning_rate": 0.0002477411477411477, |
| "loss": 0.6067, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.771110805986544, |
| "grad_norm": 0.452709436416626, |
| "learning_rate": 0.00024761904761904757, |
| "loss": 0.671, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7722092544281203, |
| "grad_norm": 0.581453263759613, |
| "learning_rate": 0.0002474969474969475, |
| "loss": 0.5356, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7733077028696965, |
| "grad_norm": 0.8013669848442078, |
| "learning_rate": 0.00024737484737484734, |
| "loss": 0.6889, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7744061513112728, |
| "grad_norm": 1.1480565071105957, |
| "learning_rate": 0.0002472527472527472, |
| "loss": 0.7456, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7755045997528491, |
| "grad_norm": 0.7568329572677612, |
| "learning_rate": 0.0002471306471306471, |
| "loss": 0.7455, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7766030481944254, |
| "grad_norm": 0.4223226308822632, |
| "learning_rate": 0.000247008547008547, |
| "loss": 0.7138, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7777014966360016, |
| "grad_norm": 0.372872531414032, |
| "learning_rate": 0.00024688644688644683, |
| "loss": 0.8037, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7787999450775779, |
| "grad_norm": 0.968614399433136, |
| "learning_rate": 0.00024676434676434674, |
| "loss": 0.5943, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7798983935191542, |
| "grad_norm": 0.801157534122467, |
| "learning_rate": 0.00024664224664224665, |
| "loss": 0.9467, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7809968419607305, |
| "grad_norm": 0.7115808129310608, |
| "learning_rate": 0.0002465201465201465, |
| "loss": 0.7828, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.7820952904023067, |
| "grad_norm": 1.2951349020004272, |
| "learning_rate": 0.00024639804639804637, |
| "loss": 0.6221, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.783193738843883, |
| "grad_norm": 0.47706693410873413, |
| "learning_rate": 0.0002462759462759463, |
| "loss": 0.3641, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7842921872854592, |
| "grad_norm": 0.8871097564697266, |
| "learning_rate": 0.00024615384615384614, |
| "loss": 0.6177, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7853906357270356, |
| "grad_norm": 0.7920973896980286, |
| "learning_rate": 0.000246031746031746, |
| "loss": 0.5858, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7864890841686119, |
| "grad_norm": 0.49732694029808044, |
| "learning_rate": 0.0002459096459096459, |
| "loss": 0.5176, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7875875326101881, |
| "grad_norm": 0.34965720772743225, |
| "learning_rate": 0.00024578754578754577, |
| "loss": 0.4983, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7886859810517644, |
| "grad_norm": 0.45963025093078613, |
| "learning_rate": 0.00024566544566544563, |
| "loss": 0.7756, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7897844294933407, |
| "grad_norm": 0.5802373290061951, |
| "learning_rate": 0.00024554334554334554, |
| "loss": 0.5773, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.790882877934917, |
| "grad_norm": 1.8482742309570312, |
| "learning_rate": 0.0002454212454212454, |
| "loss": 0.7978, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7919813263764932, |
| "grad_norm": 0.5821959972381592, |
| "learning_rate": 0.00024529914529914526, |
| "loss": 0.7483, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.7930797748180695, |
| "grad_norm": 0.9352701306343079, |
| "learning_rate": 0.0002451770451770451, |
| "loss": 0.6979, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7941782232596457, |
| "grad_norm": 0.554032564163208, |
| "learning_rate": 0.00024505494505494503, |
| "loss": 0.6773, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7952766717012221, |
| "grad_norm": 0.6914504766464233, |
| "learning_rate": 0.00024493284493284494, |
| "loss": 0.6548, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7963751201427983, |
| "grad_norm": 0.40804949402809143, |
| "learning_rate": 0.0002448107448107448, |
| "loss": 0.4634, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7974735685843746, |
| "grad_norm": 0.4965716302394867, |
| "learning_rate": 0.00024468864468864466, |
| "loss": 0.4879, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7985720170259508, |
| "grad_norm": 0.48798999190330505, |
| "learning_rate": 0.00024456654456654457, |
| "loss": 0.7003, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7996704654675271, |
| "grad_norm": 0.6946013569831848, |
| "learning_rate": 0.00024444444444444443, |
| "loss": 0.7508, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8007689139091034, |
| "grad_norm": 0.4310678243637085, |
| "learning_rate": 0.0002443223443223443, |
| "loss": 0.5765, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8018673623506797, |
| "grad_norm": 0.5407636761665344, |
| "learning_rate": 0.0002442002442002442, |
| "loss": 0.5445, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8029658107922559, |
| "grad_norm": 0.6281490921974182, |
| "learning_rate": 0.00024407814407814403, |
| "loss": 0.9319, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8040642592338322, |
| "grad_norm": 1.2027008533477783, |
| "learning_rate": 0.00024395604395604394, |
| "loss": 0.3957, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8051627076754085, |
| "grad_norm": 0.543230414390564, |
| "learning_rate": 0.00024383394383394383, |
| "loss": 0.7919, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8062611561169848, |
| "grad_norm": 0.4269828498363495, |
| "learning_rate": 0.0002437118437118437, |
| "loss": 0.6081, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.807359604558561, |
| "grad_norm": 1.2857966423034668, |
| "learning_rate": 0.00024358974358974357, |
| "loss": 0.8654, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.8084580530001373, |
| "grad_norm": 0.6370485424995422, |
| "learning_rate": 0.00024346764346764346, |
| "loss": 0.8053, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8095565014417135, |
| "grad_norm": 1.1288559436798096, |
| "learning_rate": 0.00024334554334554332, |
| "loss": 0.8709, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.8106549498832899, |
| "grad_norm": 0.5601497292518616, |
| "learning_rate": 0.0002432234432234432, |
| "loss": 0.7982, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.8117533983248661, |
| "grad_norm": 0.476745069026947, |
| "learning_rate": 0.0002431013431013431, |
| "loss": 0.7372, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.8128518467664424, |
| "grad_norm": 0.4287762939929962, |
| "learning_rate": 0.00024297924297924295, |
| "loss": 0.5686, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8139502952080186, |
| "grad_norm": 0.7039306163787842, |
| "learning_rate": 0.00024285714285714283, |
| "loss": 0.7976, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.8150487436495949, |
| "grad_norm": 0.47433528304100037, |
| "learning_rate": 0.00024273504273504272, |
| "loss": 0.6375, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.8161471920911713, |
| "grad_norm": 0.5443944931030273, |
| "learning_rate": 0.00024261294261294258, |
| "loss": 0.6793, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8172456405327475, |
| "grad_norm": 0.516094982624054, |
| "learning_rate": 0.00024249084249084246, |
| "loss": 0.785, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8183440889743238, |
| "grad_norm": 0.6694304347038269, |
| "learning_rate": 0.00024236874236874237, |
| "loss": 0.5431, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8194425374159, |
| "grad_norm": 0.5309669375419617, |
| "learning_rate": 0.00024224664224664223, |
| "loss": 0.5806, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8205409858574764, |
| "grad_norm": 0.5502971410751343, |
| "learning_rate": 0.00024212454212454212, |
| "loss": 0.5053, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8216394342990526, |
| "grad_norm": 0.5242869853973389, |
| "learning_rate": 0.00024200244200244198, |
| "loss": 0.8189, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8227378827406289, |
| "grad_norm": 0.4131311774253845, |
| "learning_rate": 0.00024188034188034186, |
| "loss": 0.7074, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8238363311822051, |
| "grad_norm": 0.599915087223053, |
| "learning_rate": 0.00024175824175824175, |
| "loss": 0.9408, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8249347796237814, |
| "grad_norm": 0.3683515191078186, |
| "learning_rate": 0.0002416361416361416, |
| "loss": 0.6675, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8260332280653577, |
| "grad_norm": 1.633415699005127, |
| "learning_rate": 0.0002415140415140415, |
| "loss": 0.6768, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.827131676506934, |
| "grad_norm": 0.3848377764225006, |
| "learning_rate": 0.00024139194139194138, |
| "loss": 0.485, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8282301249485102, |
| "grad_norm": 0.4116027355194092, |
| "learning_rate": 0.00024126984126984123, |
| "loss": 0.8253, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8293285733900865, |
| "grad_norm": 0.5805407762527466, |
| "learning_rate": 0.00024114774114774112, |
| "loss": 0.825, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8304270218316627, |
| "grad_norm": 1.2401742935180664, |
| "learning_rate": 0.000241025641025641, |
| "loss": 0.6394, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8315254702732391, |
| "grad_norm": 0.42345038056373596, |
| "learning_rate": 0.00024090354090354086, |
| "loss": 0.6958, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8326239187148153, |
| "grad_norm": 1.3758116960525513, |
| "learning_rate": 0.00024078144078144075, |
| "loss": 0.6997, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8337223671563916, |
| "grad_norm": 1.1826672554016113, |
| "learning_rate": 0.00024065934065934066, |
| "loss": 0.7908, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8348208155979678, |
| "grad_norm": 1.0752373933792114, |
| "learning_rate": 0.0002405372405372405, |
| "loss": 0.8896, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8359192640395442, |
| "grad_norm": 0.3347112834453583, |
| "learning_rate": 0.0002404151404151404, |
| "loss": 0.8202, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8370177124811204, |
| "grad_norm": 0.5837082266807556, |
| "learning_rate": 0.0002402930402930403, |
| "loss": 0.7502, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8381161609226967, |
| "grad_norm": 0.5439388751983643, |
| "learning_rate": 0.00024017094017094015, |
| "loss": 0.6928, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.839214609364273, |
| "grad_norm": 0.35348060727119446, |
| "learning_rate": 0.00024004884004884004, |
| "loss": 0.5495, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8403130578058492, |
| "grad_norm": 0.4943974018096924, |
| "learning_rate": 0.00023992673992673992, |
| "loss": 0.9218, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8414115062474256, |
| "grad_norm": 0.628667414188385, |
| "learning_rate": 0.00023980463980463978, |
| "loss": 0.6266, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8425099546890018, |
| "grad_norm": 0.822575032711029, |
| "learning_rate": 0.00023968253968253966, |
| "loss": 0.791, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.843608403130578, |
| "grad_norm": 0.3044184446334839, |
| "learning_rate": 0.00023956043956043955, |
| "loss": 0.6048, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8447068515721543, |
| "grad_norm": 0.40807369351387024, |
| "learning_rate": 0.0002394383394383394, |
| "loss": 0.6286, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8458053000137306, |
| "grad_norm": 1.2373838424682617, |
| "learning_rate": 0.0002393162393162393, |
| "loss": 0.5133, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8469037484553069, |
| "grad_norm": 0.5104987025260925, |
| "learning_rate": 0.00023919413919413918, |
| "loss": 0.591, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8480021968968832, |
| "grad_norm": 0.6644220352172852, |
| "learning_rate": 0.00023907203907203904, |
| "loss": 0.7039, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8491006453384594, |
| "grad_norm": 0.5887960195541382, |
| "learning_rate": 0.00023894993894993892, |
| "loss": 0.7017, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8501990937800357, |
| "grad_norm": 0.6568577885627747, |
| "learning_rate": 0.00023882783882783878, |
| "loss": 0.6131, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.851297542221612, |
| "grad_norm": 0.6594721674919128, |
| "learning_rate": 0.00023870573870573867, |
| "loss": 0.6079, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8523959906631883, |
| "grad_norm": 12.29937744140625, |
| "learning_rate": 0.00023858363858363858, |
| "loss": 1.1068, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8534944391047645, |
| "grad_norm": 1.175355315208435, |
| "learning_rate": 0.00023846153846153844, |
| "loss": 0.734, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8545928875463408, |
| "grad_norm": 1.7128019332885742, |
| "learning_rate": 0.00023833943833943832, |
| "loss": 0.6395, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.855691335987917, |
| "grad_norm": 0.6479717493057251, |
| "learning_rate": 0.0002382173382173382, |
| "loss": 0.8572, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8567897844294934, |
| "grad_norm": 0.9646544456481934, |
| "learning_rate": 0.00023809523809523807, |
| "loss": 1.1168, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8578882328710696, |
| "grad_norm": 0.8290930986404419, |
| "learning_rate": 0.00023797313797313795, |
| "loss": 0.4413, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8589866813126459, |
| "grad_norm": 0.6690389513969421, |
| "learning_rate": 0.00023785103785103784, |
| "loss": 1.1878, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8600851297542221, |
| "grad_norm": 0.6602356433868408, |
| "learning_rate": 0.0002377289377289377, |
| "loss": 0.5862, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8611835781957984, |
| "grad_norm": 0.612316370010376, |
| "learning_rate": 0.00023760683760683758, |
| "loss": 0.7971, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8622820266373747, |
| "grad_norm": 0.7429434657096863, |
| "learning_rate": 0.00023748473748473747, |
| "loss": 0.6265, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.863380475078951, |
| "grad_norm": 0.40107640624046326, |
| "learning_rate": 0.00023736263736263733, |
| "loss": 0.6697, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8644789235205272, |
| "grad_norm": 0.45808035135269165, |
| "learning_rate": 0.0002372405372405372, |
| "loss": 0.7443, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8655773719621035, |
| "grad_norm": 0.36327049136161804, |
| "learning_rate": 0.0002371184371184371, |
| "loss": 0.6518, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8666758204036799, |
| "grad_norm": 0.45617833733558655, |
| "learning_rate": 0.00023699633699633696, |
| "loss": 0.792, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8677742688452561, |
| "grad_norm": 0.5354835391044617, |
| "learning_rate": 0.00023687423687423687, |
| "loss": 0.7788, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8688727172868324, |
| "grad_norm": 0.9770327210426331, |
| "learning_rate": 0.00023675213675213675, |
| "loss": 0.7267, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8699711657284086, |
| "grad_norm": 0.646757960319519, |
| "learning_rate": 0.0002366300366300366, |
| "loss": 0.7234, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8710696141699849, |
| "grad_norm": 0.4694693982601166, |
| "learning_rate": 0.0002365079365079365, |
| "loss": 0.8261, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8721680626115612, |
| "grad_norm": 0.9923954606056213, |
| "learning_rate": 0.00023638583638583638, |
| "loss": 0.703, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8732665110531375, |
| "grad_norm": 1.6440534591674805, |
| "learning_rate": 0.00023626373626373624, |
| "loss": 0.7654, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8743649594947137, |
| "grad_norm": 0.3947128653526306, |
| "learning_rate": 0.00023614163614163613, |
| "loss": 0.637, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.87546340793629, |
| "grad_norm": 3.4264323711395264, |
| "learning_rate": 0.000236019536019536, |
| "loss": 0.7325, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8765618563778662, |
| "grad_norm": 0.5469256043434143, |
| "learning_rate": 0.00023589743589743587, |
| "loss": 0.8203, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8776603048194426, |
| "grad_norm": 0.5184471011161804, |
| "learning_rate": 0.00023577533577533576, |
| "loss": 0.7895, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8787587532610188, |
| "grad_norm": 0.8231347799301147, |
| "learning_rate": 0.00023565323565323562, |
| "loss": 0.7888, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8798572017025951, |
| "grad_norm": 14.826855659484863, |
| "learning_rate": 0.0002355311355311355, |
| "loss": 0.7564, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8809556501441713, |
| "grad_norm": 0.5809927582740784, |
| "learning_rate": 0.00023540903540903539, |
| "loss": 0.6702, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8820540985857477, |
| "grad_norm": 0.7244674563407898, |
| "learning_rate": 0.00023528693528693524, |
| "loss": 0.6475, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.8831525470273239, |
| "grad_norm": 0.8071272373199463, |
| "learning_rate": 0.00023516483516483513, |
| "loss": 0.7434, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8842509954689002, |
| "grad_norm": 0.6872429847717285, |
| "learning_rate": 0.00023504273504273504, |
| "loss": 0.5968, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8853494439104764, |
| "grad_norm": 9.353965759277344, |
| "learning_rate": 0.00023492063492063487, |
| "loss": 0.4228, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8864478923520527, |
| "grad_norm": 0.47151222825050354, |
| "learning_rate": 0.00023479853479853479, |
| "loss": 0.6832, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.887546340793629, |
| "grad_norm": 1.4599422216415405, |
| "learning_rate": 0.00023467643467643467, |
| "loss": 0.6692, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8886447892352053, |
| "grad_norm": 0.45811519026756287, |
| "learning_rate": 0.00023455433455433453, |
| "loss": 0.787, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.8897432376767815, |
| "grad_norm": 1.077709674835205, |
| "learning_rate": 0.00023443223443223442, |
| "loss": 0.6695, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8908416861183578, |
| "grad_norm": 0.5702061057090759, |
| "learning_rate": 0.0002343101343101343, |
| "loss": 0.5858, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.891940134559934, |
| "grad_norm": 2.2391059398651123, |
| "learning_rate": 0.00023418803418803416, |
| "loss": 0.6688, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8930385830015104, |
| "grad_norm": 1.6974279880523682, |
| "learning_rate": 0.00023406593406593405, |
| "loss": 0.8545, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.8941370314430866, |
| "grad_norm": 0.983435869216919, |
| "learning_rate": 0.00023394383394383393, |
| "loss": 0.8128, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8952354798846629, |
| "grad_norm": 0.44103240966796875, |
| "learning_rate": 0.0002338217338217338, |
| "loss": 0.7968, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8963339283262391, |
| "grad_norm": 1.0707038640975952, |
| "learning_rate": 0.00023369963369963367, |
| "loss": 0.6996, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8974323767678155, |
| "grad_norm": 0.8029122352600098, |
| "learning_rate": 0.00023357753357753356, |
| "loss": 0.7911, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.8985308252093918, |
| "grad_norm": 0.46339499950408936, |
| "learning_rate": 0.00023345543345543342, |
| "loss": 0.7712, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.899629273650968, |
| "grad_norm": 1.020947813987732, |
| "learning_rate": 0.0002333333333333333, |
| "loss": 0.6865, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9007277220925443, |
| "grad_norm": 0.5332039594650269, |
| "learning_rate": 0.00023321123321123322, |
| "loss": 0.8352, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9018261705341205, |
| "grad_norm": 0.40052923560142517, |
| "learning_rate": 0.00023308913308913307, |
| "loss": 0.5435, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.9029246189756969, |
| "grad_norm": 0.6643521189689636, |
| "learning_rate": 0.00023296703296703296, |
| "loss": 0.7406, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9040230674172731, |
| "grad_norm": 0.7514997720718384, |
| "learning_rate": 0.00023284493284493285, |
| "loss": 0.7595, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.9051215158588494, |
| "grad_norm": 0.7124571204185486, |
| "learning_rate": 0.0002327228327228327, |
| "loss": 0.5736, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9062199643004256, |
| "grad_norm": 0.6757075786590576, |
| "learning_rate": 0.0002326007326007326, |
| "loss": 0.6275, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9073184127420019, |
| "grad_norm": 0.4200783669948578, |
| "learning_rate": 0.00023247863247863245, |
| "loss": 0.6267, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9084168611835782, |
| "grad_norm": 0.5442836284637451, |
| "learning_rate": 0.00023235653235653233, |
| "loss": 0.6814, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.9095153096251545, |
| "grad_norm": 0.4859601557254791, |
| "learning_rate": 0.00023223443223443222, |
| "loss": 0.6451, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9106137580667307, |
| "grad_norm": 0.7353097200393677, |
| "learning_rate": 0.00023211233211233208, |
| "loss": 0.6723, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.911712206508307, |
| "grad_norm": 0.6389304995536804, |
| "learning_rate": 0.00023199023199023196, |
| "loss": 0.9429, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9128106549498833, |
| "grad_norm": 0.6813933849334717, |
| "learning_rate": 0.00023186813186813185, |
| "loss": 0.5319, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.9139091033914596, |
| "grad_norm": 0.40023690462112427, |
| "learning_rate": 0.0002317460317460317, |
| "loss": 0.5808, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9150075518330358, |
| "grad_norm": 0.5327205657958984, |
| "learning_rate": 0.0002316239316239316, |
| "loss": 0.6666, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.9161060002746121, |
| "grad_norm": 1.672450065612793, |
| "learning_rate": 0.0002315018315018315, |
| "loss": 0.7758, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.9172044487161883, |
| "grad_norm": 0.5022990703582764, |
| "learning_rate": 0.00023137973137973134, |
| "loss": 0.6309, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9183028971577647, |
| "grad_norm": 0.43023642897605896, |
| "learning_rate": 0.00023125763125763125, |
| "loss": 0.5343, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.919401345599341, |
| "grad_norm": 0.6878641843795776, |
| "learning_rate": 0.00023113553113553113, |
| "loss": 0.7268, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9204997940409172, |
| "grad_norm": 0.40551453828811646, |
| "learning_rate": 0.000231013431013431, |
| "loss": 0.5784, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9215982424824934, |
| "grad_norm": 0.412356436252594, |
| "learning_rate": 0.00023089133089133088, |
| "loss": 0.7685, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9226966909240698, |
| "grad_norm": 1.1603305339813232, |
| "learning_rate": 0.00023076923076923076, |
| "loss": 0.518, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9237951393656461, |
| "grad_norm": 0.6733229756355286, |
| "learning_rate": 0.00023064713064713062, |
| "loss": 0.5883, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.9248935878072223, |
| "grad_norm": 0.619434654712677, |
| "learning_rate": 0.0002305250305250305, |
| "loss": 0.6244, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9259920362487986, |
| "grad_norm": 0.6989772319793701, |
| "learning_rate": 0.0002304029304029304, |
| "loss": 0.5763, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9270904846903748, |
| "grad_norm": 0.6276418566703796, |
| "learning_rate": 0.00023028083028083025, |
| "loss": 0.4762, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9281889331319512, |
| "grad_norm": 0.5577360987663269, |
| "learning_rate": 0.00023015873015873014, |
| "loss": 0.6254, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9292873815735274, |
| "grad_norm": 0.6185848116874695, |
| "learning_rate": 0.00023003663003663002, |
| "loss": 1.0182, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9303858300151037, |
| "grad_norm": 1.2415262460708618, |
| "learning_rate": 0.00022991452991452988, |
| "loss": 0.4677, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9314842784566799, |
| "grad_norm": 0.4582594335079193, |
| "learning_rate": 0.00022979242979242977, |
| "loss": 0.6308, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9325827268982562, |
| "grad_norm": 0.4749620258808136, |
| "learning_rate": 0.00022967032967032962, |
| "loss": 0.6217, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9336811753398325, |
| "grad_norm": 0.48614588379859924, |
| "learning_rate": 0.0002295482295482295, |
| "loss": 0.7469, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9347796237814088, |
| "grad_norm": 0.7357453107833862, |
| "learning_rate": 0.00022942612942612942, |
| "loss": 0.5978, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.935878072222985, |
| "grad_norm": 0.53326815366745, |
| "learning_rate": 0.00022930402930402928, |
| "loss": 0.7678, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9369765206645613, |
| "grad_norm": 0.4853271245956421, |
| "learning_rate": 0.00022918192918192917, |
| "loss": 0.4888, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9380749691061376, |
| "grad_norm": 1.6529743671417236, |
| "learning_rate": 0.00022905982905982905, |
| "loss": 0.6103, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9391734175477139, |
| "grad_norm": 0.8255143165588379, |
| "learning_rate": 0.0002289377289377289, |
| "loss": 0.6977, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9402718659892901, |
| "grad_norm": 0.3999016284942627, |
| "learning_rate": 0.0002288156288156288, |
| "loss": 0.5398, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9413703144308664, |
| "grad_norm": 1.933090329170227, |
| "learning_rate": 0.00022869352869352868, |
| "loss": 1.0827, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9424687628724426, |
| "grad_norm": 0.8884105682373047, |
| "learning_rate": 0.00022857142857142854, |
| "loss": 0.702, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.943567211314019, |
| "grad_norm": 0.4555901885032654, |
| "learning_rate": 0.00022844932844932843, |
| "loss": 0.8737, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9446656597555952, |
| "grad_norm": 0.535915732383728, |
| "learning_rate": 0.0002283272283272283, |
| "loss": 0.7036, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9457641081971715, |
| "grad_norm": 0.7607597708702087, |
| "learning_rate": 0.00022820512820512817, |
| "loss": 0.8707, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9468625566387477, |
| "grad_norm": 0.4056457579135895, |
| "learning_rate": 0.00022808302808302805, |
| "loss": 0.6658, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.947961005080324, |
| "grad_norm": 0.5472984313964844, |
| "learning_rate": 0.00022796092796092794, |
| "loss": 0.5429, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9490594535219004, |
| "grad_norm": 0.6866592764854431, |
| "learning_rate": 0.0002278388278388278, |
| "loss": 0.7343, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9501579019634766, |
| "grad_norm": 0.5244406461715698, |
| "learning_rate": 0.0002277167277167277, |
| "loss": 0.669, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9512563504050529, |
| "grad_norm": 0.45024383068084717, |
| "learning_rate": 0.0002275946275946276, |
| "loss": 0.9062, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9523547988466291, |
| "grad_norm": 0.4252873659133911, |
| "learning_rate": 0.00022747252747252745, |
| "loss": 0.6109, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9534532472882055, |
| "grad_norm": 0.50081467628479, |
| "learning_rate": 0.00022735042735042734, |
| "loss": 0.5266, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9545516957297817, |
| "grad_norm": 0.9674072861671448, |
| "learning_rate": 0.00022722832722832723, |
| "loss": 0.7197, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.955650144171358, |
| "grad_norm": 1.572348952293396, |
| "learning_rate": 0.00022710622710622708, |
| "loss": 0.4728, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9567485926129342, |
| "grad_norm": 0.6033158898353577, |
| "learning_rate": 0.00022698412698412697, |
| "loss": 0.6394, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9578470410545105, |
| "grad_norm": 0.5810523629188538, |
| "learning_rate": 0.00022686202686202686, |
| "loss": 0.8813, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9589454894960868, |
| "grad_norm": 0.46345213055610657, |
| "learning_rate": 0.00022673992673992671, |
| "loss": 0.5828, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9600439379376631, |
| "grad_norm": 0.5414748191833496, |
| "learning_rate": 0.0002266178266178266, |
| "loss": 0.6311, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9611423863792393, |
| "grad_norm": 0.9083818197250366, |
| "learning_rate": 0.00022649572649572646, |
| "loss": 0.961, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9622408348208156, |
| "grad_norm": 0.786993145942688, |
| "learning_rate": 0.00022637362637362634, |
| "loss": 0.7825, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9633392832623918, |
| "grad_norm": 0.7639968991279602, |
| "learning_rate": 0.00022625152625152623, |
| "loss": 0.8989, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9644377317039682, |
| "grad_norm": 0.43360400199890137, |
| "learning_rate": 0.0002261294261294261, |
| "loss": 0.6747, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9655361801455444, |
| "grad_norm": 0.8512898683547974, |
| "learning_rate": 0.00022600732600732597, |
| "loss": 0.7152, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9666346285871207, |
| "grad_norm": 0.46903684735298157, |
| "learning_rate": 0.00022588522588522589, |
| "loss": 0.7594, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9677330770286969, |
| "grad_norm": 1.9560080766677856, |
| "learning_rate": 0.00022576312576312572, |
| "loss": 0.598, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9688315254702733, |
| "grad_norm": 1.1595470905303955, |
| "learning_rate": 0.00022564102564102563, |
| "loss": 0.6005, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9699299739118495, |
| "grad_norm": 0.7318668365478516, |
| "learning_rate": 0.00022551892551892551, |
| "loss": 0.7327, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9710284223534258, |
| "grad_norm": 0.6557647585868835, |
| "learning_rate": 0.00022539682539682537, |
| "loss": 0.5858, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.972126870795002, |
| "grad_norm": 0.5645928382873535, |
| "learning_rate": 0.00022527472527472526, |
| "loss": 0.5818, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9732253192365783, |
| "grad_norm": 0.4630253314971924, |
| "learning_rate": 0.00022515262515262514, |
| "loss": 0.8363, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.9743237676781547, |
| "grad_norm": 0.6750912666320801, |
| "learning_rate": 0.000225030525030525, |
| "loss": 0.8865, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9754222161197309, |
| "grad_norm": 0.6309487819671631, |
| "learning_rate": 0.0002249084249084249, |
| "loss": 0.5596, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9765206645613072, |
| "grad_norm": 0.9696050882339478, |
| "learning_rate": 0.00022478632478632477, |
| "loss": 0.7752, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.9776191130028834, |
| "grad_norm": 0.7614735960960388, |
| "learning_rate": 0.00022466422466422463, |
| "loss": 0.7131, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9787175614444596, |
| "grad_norm": 0.4971006214618683, |
| "learning_rate": 0.00022454212454212452, |
| "loss": 0.6218, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.979816009886036, |
| "grad_norm": 0.47809773683547974, |
| "learning_rate": 0.0002244200244200244, |
| "loss": 0.5678, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9809144583276123, |
| "grad_norm": 0.5959337949752808, |
| "learning_rate": 0.00022429792429792426, |
| "loss": 1.0002, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.9820129067691885, |
| "grad_norm": 0.45277753472328186, |
| "learning_rate": 0.00022417582417582415, |
| "loss": 0.7321, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.9831113552107648, |
| "grad_norm": 1.279405951499939, |
| "learning_rate": 0.00022405372405372406, |
| "loss": 0.7912, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.9842098036523411, |
| "grad_norm": 0.49885687232017517, |
| "learning_rate": 0.00022393162393162392, |
| "loss": 0.5558, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9853082520939174, |
| "grad_norm": 0.474979430437088, |
| "learning_rate": 0.0002238095238095238, |
| "loss": 0.7095, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9864067005354936, |
| "grad_norm": 0.3826389014720917, |
| "learning_rate": 0.0002236874236874237, |
| "loss": 0.5695, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9875051489770699, |
| "grad_norm": 0.33514517545700073, |
| "learning_rate": 0.00022356532356532355, |
| "loss": 0.6341, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9886035974186461, |
| "grad_norm": 0.5049251914024353, |
| "learning_rate": 0.00022344322344322343, |
| "loss": 0.5577, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9897020458602225, |
| "grad_norm": 0.5179988145828247, |
| "learning_rate": 0.0002233211233211233, |
| "loss": 0.5769, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9908004943017987, |
| "grad_norm": 0.5194469094276428, |
| "learning_rate": 0.00022319902319902318, |
| "loss": 0.5466, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.991898942743375, |
| "grad_norm": 0.46941491961479187, |
| "learning_rate": 0.00022307692307692306, |
| "loss": 0.642, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.9929973911849512, |
| "grad_norm": 0.379682719707489, |
| "learning_rate": 0.00022295482295482292, |
| "loss": 0.5508, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.9940958396265275, |
| "grad_norm": 1.3844119310379028, |
| "learning_rate": 0.0002228327228327228, |
| "loss": 0.8814, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9951942880681038, |
| "grad_norm": 2.497697114944458, |
| "learning_rate": 0.0002227106227106227, |
| "loss": 0.8116, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.9962927365096801, |
| "grad_norm": 0.36689239740371704, |
| "learning_rate": 0.00022258852258852255, |
| "loss": 0.5001, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.9973911849512563, |
| "grad_norm": 0.39868447184562683, |
| "learning_rate": 0.00022246642246642243, |
| "loss": 0.6913, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9984896333928326, |
| "grad_norm": 0.5270336270332336, |
| "learning_rate": 0.00022234432234432235, |
| "loss": 0.5401, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.999588081834409, |
| "grad_norm": 0.4079851508140564, |
| "learning_rate": 0.00022222222222222218, |
| "loss": 0.471, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.000686530275985, |
| "grad_norm": 0.43189048767089844, |
| "learning_rate": 0.0002221001221001221, |
| "loss": 0.8237, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.0017849787175614, |
| "grad_norm": 0.52342289686203, |
| "learning_rate": 0.00022197802197802198, |
| "loss": 0.6363, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.0028834271591378, |
| "grad_norm": 0.38078904151916504, |
| "learning_rate": 0.00022185592185592184, |
| "loss": 0.4411, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.003981875600714, |
| "grad_norm": 0.5302817821502686, |
| "learning_rate": 0.00022173382173382172, |
| "loss": 0.858, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.0050803240422903, |
| "grad_norm": 0.3696751892566681, |
| "learning_rate": 0.0002216117216117216, |
| "loss": 0.8766, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.0061787724838664, |
| "grad_norm": 0.7566766738891602, |
| "learning_rate": 0.00022148962148962146, |
| "loss": 1.067, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.0072772209254428, |
| "grad_norm": 0.7399318218231201, |
| "learning_rate": 0.00022136752136752135, |
| "loss": 0.6683, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.0083756693670192, |
| "grad_norm": 0.5435899496078491, |
| "learning_rate": 0.00022124542124542124, |
| "loss": 0.6045, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.0094741178085953, |
| "grad_norm": 0.9680571556091309, |
| "learning_rate": 0.0002211233211233211, |
| "loss": 0.7546, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.0105725662501717, |
| "grad_norm": 0.6131067872047424, |
| "learning_rate": 0.00022100122100122098, |
| "loss": 0.6655, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0116710146917478, |
| "grad_norm": 0.8093316555023193, |
| "learning_rate": 0.00022087912087912086, |
| "loss": 0.4812, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.0127694631333242, |
| "grad_norm": 0.5077763199806213, |
| "learning_rate": 0.00022075702075702072, |
| "loss": 0.5357, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.0138679115749005, |
| "grad_norm": 0.4767695963382721, |
| "learning_rate": 0.0002206349206349206, |
| "loss": 0.5807, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.0149663600164767, |
| "grad_norm": 0.3215581178665161, |
| "learning_rate": 0.00022051282051282052, |
| "loss": 0.5773, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.016064808458053, |
| "grad_norm": 0.425603985786438, |
| "learning_rate": 0.00022039072039072035, |
| "loss": 0.5441, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0171632568996292, |
| "grad_norm": 0.6131730079650879, |
| "learning_rate": 0.00022026862026862027, |
| "loss": 0.856, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.0182617053412055, |
| "grad_norm": 0.5472941398620605, |
| "learning_rate": 0.00022014652014652012, |
| "loss": 0.8228, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.0193601537827819, |
| "grad_norm": 0.46728211641311646, |
| "learning_rate": 0.00022002442002442, |
| "loss": 0.7615, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.020458602224358, |
| "grad_norm": 0.39919501543045044, |
| "learning_rate": 0.0002199023199023199, |
| "loss": 0.709, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.0215570506659344, |
| "grad_norm": 0.564400315284729, |
| "learning_rate": 0.00021978021978021975, |
| "loss": 0.5941, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0226554991075107, |
| "grad_norm": 0.39073804020881653, |
| "learning_rate": 0.00021965811965811964, |
| "loss": 0.6386, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.0237539475490869, |
| "grad_norm": 0.3725563585758209, |
| "learning_rate": 0.00021953601953601952, |
| "loss": 0.4766, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.0248523959906632, |
| "grad_norm": 1.319197654724121, |
| "learning_rate": 0.00021941391941391938, |
| "loss": 0.8465, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.0259508444322394, |
| "grad_norm": 0.5126785635948181, |
| "learning_rate": 0.00021929181929181927, |
| "loss": 0.5103, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.0270492928738157, |
| "grad_norm": 0.5401897430419922, |
| "learning_rate": 0.00021916971916971915, |
| "loss": 0.5879, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.028147741315392, |
| "grad_norm": 0.47014057636260986, |
| "learning_rate": 0.000219047619047619, |
| "loss": 0.658, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0292461897569682, |
| "grad_norm": 0.49227291345596313, |
| "learning_rate": 0.0002189255189255189, |
| "loss": 0.5271, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.0303446381985446, |
| "grad_norm": 0.8186778426170349, |
| "learning_rate": 0.00021880341880341878, |
| "loss": 0.6491, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.0314430866401207, |
| "grad_norm": 0.46345674991607666, |
| "learning_rate": 0.00021868131868131864, |
| "loss": 0.7935, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.032541535081697, |
| "grad_norm": 1.7300915718078613, |
| "learning_rate": 0.00021855921855921855, |
| "loss": 0.516, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0336399835232735, |
| "grad_norm": 0.5100822448730469, |
| "learning_rate": 0.00021843711843711844, |
| "loss": 0.8286, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.0347384319648496, |
| "grad_norm": 0.42278483510017395, |
| "learning_rate": 0.0002183150183150183, |
| "loss": 0.7312, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.035836880406426, |
| "grad_norm": 0.42105185985565186, |
| "learning_rate": 0.00021819291819291818, |
| "loss": 0.5729, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.036935328848002, |
| "grad_norm": 0.5117312669754028, |
| "learning_rate": 0.00021807081807081807, |
| "loss": 0.7688, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0380337772895785, |
| "grad_norm": 0.4982740879058838, |
| "learning_rate": 0.00021794871794871793, |
| "loss": 0.5746, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.0391322257311548, |
| "grad_norm": 0.5181052684783936, |
| "learning_rate": 0.0002178266178266178, |
| "loss": 0.8446, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.040230674172731, |
| "grad_norm": 5.104315757751465, |
| "learning_rate": 0.0002177045177045177, |
| "loss": 0.9641, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.0413291226143073, |
| "grad_norm": 0.7384645938873291, |
| "learning_rate": 0.00021758241758241756, |
| "loss": 0.7168, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.0424275710558835, |
| "grad_norm": 0.4367550313472748, |
| "learning_rate": 0.00021746031746031744, |
| "loss": 0.7139, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.0435260194974598, |
| "grad_norm": 0.7332566380500793, |
| "learning_rate": 0.00021733821733821733, |
| "loss": 0.7082, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0446244679390362, |
| "grad_norm": 0.4191775918006897, |
| "learning_rate": 0.00021721611721611719, |
| "loss": 0.7986, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.0457229163806123, |
| "grad_norm": 0.33929941058158875, |
| "learning_rate": 0.00021709401709401707, |
| "loss": 0.3784, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.0468213648221887, |
| "grad_norm": 0.5255181789398193, |
| "learning_rate": 0.00021697191697191693, |
| "loss": 0.5842, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.047919813263765, |
| "grad_norm": 0.5401780605316162, |
| "learning_rate": 0.00021684981684981681, |
| "loss": 0.7939, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0490182617053412, |
| "grad_norm": 0.34873855113983154, |
| "learning_rate": 0.00021672771672771673, |
| "loss": 0.7957, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.0501167101469175, |
| "grad_norm": 0.33418160676956177, |
| "learning_rate": 0.00021660561660561656, |
| "loss": 0.6037, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.0512151585884937, |
| "grad_norm": 0.3197249174118042, |
| "learning_rate": 0.00021648351648351647, |
| "loss": 0.5223, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.05231360703007, |
| "grad_norm": 0.5962835550308228, |
| "learning_rate": 0.00021636141636141636, |
| "loss": 0.5213, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0534120554716464, |
| "grad_norm": 1.3891643285751343, |
| "learning_rate": 0.00021623931623931622, |
| "loss": 0.6781, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.0545105039132225, |
| "grad_norm": 0.42117932438850403, |
| "learning_rate": 0.0002161172161172161, |
| "loss": 0.6363, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.055608952354799, |
| "grad_norm": 0.4514491558074951, |
| "learning_rate": 0.00021599511599511599, |
| "loss": 0.6904, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.056707400796375, |
| "grad_norm": 0.4863387644290924, |
| "learning_rate": 0.00021587301587301584, |
| "loss": 0.6595, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.0578058492379514, |
| "grad_norm": 0.6178450584411621, |
| "learning_rate": 0.00021575091575091573, |
| "loss": 0.8412, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.0589042976795278, |
| "grad_norm": 0.3728642761707306, |
| "learning_rate": 0.00021562881562881562, |
| "loss": 0.629, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.060002746121104, |
| "grad_norm": 0.7554892301559448, |
| "learning_rate": 0.00021550671550671547, |
| "loss": 0.5804, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.0611011945626803, |
| "grad_norm": 0.550298273563385, |
| "learning_rate": 0.00021538461538461536, |
| "loss": 0.476, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.0621996430042564, |
| "grad_norm": 0.4082244336605072, |
| "learning_rate": 0.00021526251526251524, |
| "loss": 0.4001, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.0632980914458328, |
| "grad_norm": 1.2327499389648438, |
| "learning_rate": 0.0002151404151404151, |
| "loss": 0.4583, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.0643965398874091, |
| "grad_norm": 0.860550045967102, |
| "learning_rate": 0.000215018315018315, |
| "loss": 0.6415, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.0654949883289853, |
| "grad_norm": 0.558860182762146, |
| "learning_rate": 0.0002148962148962149, |
| "loss": 0.6215, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0665934367705616, |
| "grad_norm": 0.7794890403747559, |
| "learning_rate": 0.00021477411477411476, |
| "loss": 0.5094, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.0676918852121378, |
| "grad_norm": 0.48574942350387573, |
| "learning_rate": 0.00021465201465201465, |
| "loss": 0.7385, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.0687903336537141, |
| "grad_norm": 0.4496791660785675, |
| "learning_rate": 0.00021452991452991453, |
| "loss": 0.5036, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.0698887820952905, |
| "grad_norm": 0.5360952615737915, |
| "learning_rate": 0.0002144078144078144, |
| "loss": 0.6825, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.0709872305368666, |
| "grad_norm": 0.5783904194831848, |
| "learning_rate": 0.00021428571428571427, |
| "loss": 0.6736, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.072085678978443, |
| "grad_norm": 2.290815830230713, |
| "learning_rate": 0.00021416361416361416, |
| "loss": 0.696, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.0731841274200193, |
| "grad_norm": 1.3432899713516235, |
| "learning_rate": 0.00021404151404151402, |
| "loss": 0.5296, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.0742825758615955, |
| "grad_norm": 0.5308722257614136, |
| "learning_rate": 0.0002139194139194139, |
| "loss": 0.6642, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.0753810243031718, |
| "grad_norm": 0.7245768904685974, |
| "learning_rate": 0.00021379731379731376, |
| "loss": 0.6811, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.076479472744748, |
| "grad_norm": 0.3873349726200104, |
| "learning_rate": 0.00021367521367521365, |
| "loss": 0.8503, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.0775779211863243, |
| "grad_norm": 0.5792405605316162, |
| "learning_rate": 0.00021355311355311353, |
| "loss": 0.4543, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.0786763696279005, |
| "grad_norm": 0.6543241143226624, |
| "learning_rate": 0.0002134310134310134, |
| "loss": 0.7778, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.0797748180694768, |
| "grad_norm": 0.5572071075439453, |
| "learning_rate": 0.00021330891330891328, |
| "loss": 0.8446, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.0808732665110532, |
| "grad_norm": 0.5798014402389526, |
| "learning_rate": 0.0002131868131868132, |
| "loss": 0.7461, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.0819717149526293, |
| "grad_norm": 0.8282085657119751, |
| "learning_rate": 0.00021306471306471302, |
| "loss": 0.612, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.0830701633942057, |
| "grad_norm": 0.5782580971717834, |
| "learning_rate": 0.00021294261294261293, |
| "loss": 0.5506, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.084168611835782, |
| "grad_norm": 0.3826775848865509, |
| "learning_rate": 0.00021282051282051282, |
| "loss": 0.7859, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.0852670602773582, |
| "grad_norm": 0.534752368927002, |
| "learning_rate": 0.00021269841269841268, |
| "loss": 0.8835, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.0863655087189346, |
| "grad_norm": 0.45931264758110046, |
| "learning_rate": 0.00021257631257631256, |
| "loss": 0.6694, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.0874639571605107, |
| "grad_norm": 0.6106250286102295, |
| "learning_rate": 0.00021245421245421245, |
| "loss": 0.8274, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.088562405602087, |
| "grad_norm": 0.3704061806201935, |
| "learning_rate": 0.0002123321123321123, |
| "loss": 0.7449, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.0896608540436634, |
| "grad_norm": 0.3922840356826782, |
| "learning_rate": 0.0002122100122100122, |
| "loss": 0.5845, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.0907593024852396, |
| "grad_norm": 0.48152726888656616, |
| "learning_rate": 0.00021208791208791208, |
| "loss": 0.6608, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.091857750926816, |
| "grad_norm": 0.42257216572761536, |
| "learning_rate": 0.00021196581196581194, |
| "loss": 0.6379, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.092956199368392, |
| "grad_norm": 0.4746345579624176, |
| "learning_rate": 0.00021184371184371182, |
| "loss": 0.6467, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.0940546478099684, |
| "grad_norm": 0.3915644884109497, |
| "learning_rate": 0.0002117216117216117, |
| "loss": 0.9699, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.0951530962515448, |
| "grad_norm": 0.5957880020141602, |
| "learning_rate": 0.00021159951159951157, |
| "loss": 0.6917, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.096251544693121, |
| "grad_norm": 0.4327985942363739, |
| "learning_rate": 0.00021147741147741145, |
| "loss": 0.8091, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.0973499931346973, |
| "grad_norm": 0.42600274085998535, |
| "learning_rate": 0.00021135531135531136, |
| "loss": 0.7685, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.0984484415762734, |
| "grad_norm": 0.7165039777755737, |
| "learning_rate": 0.0002112332112332112, |
| "loss": 0.8646, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0995468900178498, |
| "grad_norm": 0.447652131319046, |
| "learning_rate": 0.0002111111111111111, |
| "loss": 0.521, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.1006453384594261, |
| "grad_norm": 0.3022591769695282, |
| "learning_rate": 0.000210989010989011, |
| "loss": 0.6099, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.1017437869010023, |
| "grad_norm": 0.32764387130737305, |
| "learning_rate": 0.00021086691086691085, |
| "loss": 0.5624, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.1028422353425786, |
| "grad_norm": 0.7301959991455078, |
| "learning_rate": 0.00021074481074481074, |
| "loss": 0.6091, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.1039406837841548, |
| "grad_norm": 0.4734131097793579, |
| "learning_rate": 0.0002106227106227106, |
| "loss": 0.6849, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.1050391322257311, |
| "grad_norm": 0.7214820384979248, |
| "learning_rate": 0.00021050061050061048, |
| "loss": 0.789, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.1061375806673075, |
| "grad_norm": 0.31265702843666077, |
| "learning_rate": 0.00021037851037851037, |
| "loss": 0.5176, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.1072360291088836, |
| "grad_norm": 0.5804157257080078, |
| "learning_rate": 0.00021025641025641022, |
| "loss": 1.0152, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.10833447755046, |
| "grad_norm": 0.3624595105648041, |
| "learning_rate": 0.0002101343101343101, |
| "loss": 0.6843, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.1094329259920364, |
| "grad_norm": 0.5099515318870544, |
| "learning_rate": 0.00021001221001221, |
| "loss": 0.5568, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1105313744336125, |
| "grad_norm": 0.46201249957084656, |
| "learning_rate": 0.00020989010989010985, |
| "loss": 0.5883, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.1116298228751889, |
| "grad_norm": 0.4493483603000641, |
| "learning_rate": 0.00020976800976800974, |
| "loss": 0.8338, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.112728271316765, |
| "grad_norm": 0.4771614968776703, |
| "learning_rate": 0.00020964590964590963, |
| "loss": 0.7251, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.1138267197583414, |
| "grad_norm": 2.073347806930542, |
| "learning_rate": 0.00020952380952380948, |
| "loss": 0.8921, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.1149251681999177, |
| "grad_norm": 0.435680091381073, |
| "learning_rate": 0.0002094017094017094, |
| "loss": 0.5444, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.1160236166414939, |
| "grad_norm": 0.46824783086776733, |
| "learning_rate": 0.00020927960927960928, |
| "loss": 0.5591, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.1171220650830702, |
| "grad_norm": 0.43938374519348145, |
| "learning_rate": 0.00020915750915750914, |
| "loss": 0.7476, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.1182205135246464, |
| "grad_norm": 0.3620377779006958, |
| "learning_rate": 0.00020903540903540903, |
| "loss": 0.5763, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.1193189619662227, |
| "grad_norm": 0.612406313419342, |
| "learning_rate": 0.0002089133089133089, |
| "loss": 0.706, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.120417410407799, |
| "grad_norm": 0.5045173168182373, |
| "learning_rate": 0.00020879120879120877, |
| "loss": 0.6799, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.1215158588493752, |
| "grad_norm": 0.4815331995487213, |
| "learning_rate": 0.00020866910866910865, |
| "loss": 0.8845, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.1226143072909516, |
| "grad_norm": 0.3756159245967865, |
| "learning_rate": 0.00020854700854700854, |
| "loss": 0.5545, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.1237127557325277, |
| "grad_norm": 0.3184347152709961, |
| "learning_rate": 0.0002084249084249084, |
| "loss": 0.5109, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.124811204174104, |
| "grad_norm": 0.4000808298587799, |
| "learning_rate": 0.00020830280830280828, |
| "loss": 0.8363, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.1259096526156804, |
| "grad_norm": 0.3930743336677551, |
| "learning_rate": 0.00020818070818070817, |
| "loss": 0.6183, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1270081010572566, |
| "grad_norm": 0.7536817789077759, |
| "learning_rate": 0.00020805860805860803, |
| "loss": 0.7511, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.128106549498833, |
| "grad_norm": 0.5012079477310181, |
| "learning_rate": 0.00020793650793650791, |
| "loss": 0.6346, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.129204997940409, |
| "grad_norm": 0.9914690852165222, |
| "learning_rate": 0.00020781440781440783, |
| "loss": 0.5827, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.1303034463819854, |
| "grad_norm": 0.9096476435661316, |
| "learning_rate": 0.00020769230769230766, |
| "loss": 1.0235, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.1314018948235618, |
| "grad_norm": 0.6668229699134827, |
| "learning_rate": 0.00020757020757020757, |
| "loss": 0.741, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.132500343265138, |
| "grad_norm": 0.3232771158218384, |
| "learning_rate": 0.0002074481074481074, |
| "loss": 0.6206, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.1335987917067143, |
| "grad_norm": 0.278003990650177, |
| "learning_rate": 0.00020732600732600731, |
| "loss": 0.5661, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.1346972401482907, |
| "grad_norm": 1.481213927268982, |
| "learning_rate": 0.0002072039072039072, |
| "loss": 0.6422, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.1357956885898668, |
| "grad_norm": 0.4688512682914734, |
| "learning_rate": 0.00020708180708180706, |
| "loss": 0.4163, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.1368941370314432, |
| "grad_norm": 0.6438425779342651, |
| "learning_rate": 0.00020695970695970694, |
| "loss": 0.6241, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.1379925854730193, |
| "grad_norm": 0.5013176798820496, |
| "learning_rate": 0.00020683760683760683, |
| "loss": 0.6273, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.1390910339145957, |
| "grad_norm": 0.5178597569465637, |
| "learning_rate": 0.0002067155067155067, |
| "loss": 0.7489, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.1401894823561718, |
| "grad_norm": 0.5804840922355652, |
| "learning_rate": 0.00020659340659340657, |
| "loss": 0.9142, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.1412879307977482, |
| "grad_norm": 0.47613444924354553, |
| "learning_rate": 0.00020647130647130646, |
| "loss": 0.9531, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.1423863792393245, |
| "grad_norm": 0.4835624694824219, |
| "learning_rate": 0.00020634920634920632, |
| "loss": 0.6349, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1434848276809007, |
| "grad_norm": 0.38351112604141235, |
| "learning_rate": 0.0002062271062271062, |
| "loss": 0.4726, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.144583276122477, |
| "grad_norm": 0.5533854365348816, |
| "learning_rate": 0.0002061050061050061, |
| "loss": 0.5108, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.1456817245640534, |
| "grad_norm": 0.4842824637889862, |
| "learning_rate": 0.00020598290598290595, |
| "loss": 0.6038, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.1467801730056295, |
| "grad_norm": 0.552798330783844, |
| "learning_rate": 0.00020586080586080583, |
| "loss": 0.8056, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.1478786214472059, |
| "grad_norm": 0.40466025471687317, |
| "learning_rate": 0.00020573870573870574, |
| "loss": 0.6234, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.148977069888782, |
| "grad_norm": 0.6988784074783325, |
| "learning_rate": 0.0002056166056166056, |
| "loss": 0.7721, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.1500755183303584, |
| "grad_norm": 0.4852863550186157, |
| "learning_rate": 0.0002054945054945055, |
| "loss": 0.6074, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.1511739667719347, |
| "grad_norm": 0.4548696279525757, |
| "learning_rate": 0.00020537240537240537, |
| "loss": 0.5592, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.1522724152135109, |
| "grad_norm": 0.9355410933494568, |
| "learning_rate": 0.00020525030525030523, |
| "loss": 0.8618, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.1533708636550872, |
| "grad_norm": 0.5641398429870605, |
| "learning_rate": 0.00020512820512820512, |
| "loss": 0.704, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1544693120966634, |
| "grad_norm": 0.48187771439552307, |
| "learning_rate": 0.000205006105006105, |
| "loss": 0.6008, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.1555677605382397, |
| "grad_norm": 0.41609904170036316, |
| "learning_rate": 0.00020488400488400486, |
| "loss": 0.8812, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.156666208979816, |
| "grad_norm": 0.919477105140686, |
| "learning_rate": 0.00020476190476190475, |
| "loss": 0.6597, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.1577646574213922, |
| "grad_norm": 0.5008611083030701, |
| "learning_rate": 0.0002046398046398046, |
| "loss": 0.6501, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.1588631058629686, |
| "grad_norm": 0.39832696318626404, |
| "learning_rate": 0.0002045177045177045, |
| "loss": 0.6232, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.159961554304545, |
| "grad_norm": 0.5290446281433105, |
| "learning_rate": 0.00020439560439560438, |
| "loss": 0.6123, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.161060002746121, |
| "grad_norm": 0.40837669372558594, |
| "learning_rate": 0.00020427350427350423, |
| "loss": 0.4989, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.1621584511876974, |
| "grad_norm": 0.43407055735588074, |
| "learning_rate": 0.00020415140415140412, |
| "loss": 0.6961, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.1632568996292736, |
| "grad_norm": 0.7601787447929382, |
| "learning_rate": 0.00020402930402930403, |
| "loss": 0.9308, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.16435534807085, |
| "grad_norm": 0.452628493309021, |
| "learning_rate": 0.00020390720390720386, |
| "loss": 0.6478, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.165453796512426, |
| "grad_norm": 0.4524000287055969, |
| "learning_rate": 0.00020378510378510378, |
| "loss": 0.4499, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.1665522449540024, |
| "grad_norm": 0.5971822142601013, |
| "learning_rate": 0.00020366300366300366, |
| "loss": 0.6402, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.1676506933955788, |
| "grad_norm": 0.36858659982681274, |
| "learning_rate": 0.00020354090354090352, |
| "loss": 0.6511, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.168749141837155, |
| "grad_norm": 0.47295433282852173, |
| "learning_rate": 0.0002034188034188034, |
| "loss": 0.5977, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.1698475902787313, |
| "grad_norm": 0.4402971565723419, |
| "learning_rate": 0.0002032967032967033, |
| "loss": 0.4824, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.1709460387203077, |
| "grad_norm": 0.3752620816230774, |
| "learning_rate": 0.00020317460317460315, |
| "loss": 0.6519, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.1720444871618838, |
| "grad_norm": 0.45207279920578003, |
| "learning_rate": 0.00020305250305250303, |
| "loss": 0.6869, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.1731429356034602, |
| "grad_norm": 0.4255804121494293, |
| "learning_rate": 0.00020293040293040292, |
| "loss": 0.7289, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.1742413840450363, |
| "grad_norm": 0.48725178837776184, |
| "learning_rate": 0.00020280830280830278, |
| "loss": 0.5472, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.1753398324866127, |
| "grad_norm": 0.37094470858573914, |
| "learning_rate": 0.00020268620268620266, |
| "loss": 0.558, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.176438280928189, |
| "grad_norm": 0.4191375970840454, |
| "learning_rate": 0.00020256410256410255, |
| "loss": 0.6422, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.1775367293697652, |
| "grad_norm": 0.4091531038284302, |
| "learning_rate": 0.0002024420024420024, |
| "loss": 0.6705, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.1786351778113415, |
| "grad_norm": 0.4876718521118164, |
| "learning_rate": 0.0002023199023199023, |
| "loss": 0.8265, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.1797336262529177, |
| "grad_norm": 0.43008798360824585, |
| "learning_rate": 0.0002021978021978022, |
| "loss": 0.5159, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.180832074694494, |
| "grad_norm": 0.47896140813827515, |
| "learning_rate": 0.00020207570207570204, |
| "loss": 0.5455, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.1819305231360704, |
| "grad_norm": 0.5313389301300049, |
| "learning_rate": 0.00020195360195360195, |
| "loss": 0.7628, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.1830289715776465, |
| "grad_norm": 0.46337512135505676, |
| "learning_rate": 0.00020183150183150184, |
| "loss": 0.6661, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.1841274200192229, |
| "grad_norm": 0.4304458498954773, |
| "learning_rate": 0.0002017094017094017, |
| "loss": 0.7019, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.185225868460799, |
| "grad_norm": 0.638445258140564, |
| "learning_rate": 0.00020158730158730158, |
| "loss": 0.6972, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.1863243169023754, |
| "grad_norm": 1.8217968940734863, |
| "learning_rate": 0.00020146520146520144, |
| "loss": 0.5217, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1874227653439517, |
| "grad_norm": 0.4996611773967743, |
| "learning_rate": 0.00020134310134310132, |
| "loss": 0.6767, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.1885212137855279, |
| "grad_norm": 0.43705832958221436, |
| "learning_rate": 0.0002012210012210012, |
| "loss": 0.7364, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.1896196622271042, |
| "grad_norm": 0.4148736596107483, |
| "learning_rate": 0.00020109890109890107, |
| "loss": 0.7544, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.1907181106686804, |
| "grad_norm": 0.5772218108177185, |
| "learning_rate": 0.00020097680097680095, |
| "loss": 0.6349, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.1918165591102567, |
| "grad_norm": 0.9127015471458435, |
| "learning_rate": 0.00020085470085470084, |
| "loss": 0.4772, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.192915007551833, |
| "grad_norm": 0.46906840801239014, |
| "learning_rate": 0.0002007326007326007, |
| "loss": 0.6184, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.1940134559934092, |
| "grad_norm": 0.38405168056488037, |
| "learning_rate": 0.00020061050061050058, |
| "loss": 0.5027, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.1951119044349856, |
| "grad_norm": 0.6352836489677429, |
| "learning_rate": 0.00020048840048840047, |
| "loss": 0.6674, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.196210352876562, |
| "grad_norm": 0.6750807762145996, |
| "learning_rate": 0.00020036630036630033, |
| "loss": 0.5707, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.197308801318138, |
| "grad_norm": 0.5661985874176025, |
| "learning_rate": 0.00020024420024420024, |
| "loss": 0.8298, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.1984072497597145, |
| "grad_norm": 0.6393309831619263, |
| "learning_rate": 0.00020012210012210012, |
| "loss": 0.7397, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.1995056982012906, |
| "grad_norm": 0.5442856550216675, |
| "learning_rate": 0.00019999999999999998, |
| "loss": 0.7176, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.200604146642867, |
| "grad_norm": 1.0100654363632202, |
| "learning_rate": 0.00019987789987789987, |
| "loss": 0.8052, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.201702595084443, |
| "grad_norm": 0.3916209936141968, |
| "learning_rate": 0.00019975579975579975, |
| "loss": 0.5951, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.2028010435260195, |
| "grad_norm": 0.3890608847141266, |
| "learning_rate": 0.0001996336996336996, |
| "loss": 0.8129, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.2038994919675958, |
| "grad_norm": 0.4267507493495941, |
| "learning_rate": 0.0001995115995115995, |
| "loss": 0.8741, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.204997940409172, |
| "grad_norm": 0.49055561423301697, |
| "learning_rate": 0.00019938949938949938, |
| "loss": 0.901, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.2060963888507483, |
| "grad_norm": 0.6662428379058838, |
| "learning_rate": 0.00019926739926739924, |
| "loss": 0.4971, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.2071948372923247, |
| "grad_norm": 0.4469052255153656, |
| "learning_rate": 0.00019914529914529913, |
| "loss": 0.6593, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.2082932857339008, |
| "grad_norm": 0.5514255166053772, |
| "learning_rate": 0.000199023199023199, |
| "loss": 0.8033, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2093917341754772, |
| "grad_norm": 0.4838184714317322, |
| "learning_rate": 0.00019890109890109887, |
| "loss": 0.5533, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.2104901826170533, |
| "grad_norm": 0.6061891913414001, |
| "learning_rate": 0.00019877899877899876, |
| "loss": 0.5837, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.2115886310586297, |
| "grad_norm": 0.3387523889541626, |
| "learning_rate": 0.00019865689865689867, |
| "loss": 0.455, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.212687079500206, |
| "grad_norm": 0.5204731225967407, |
| "learning_rate": 0.0001985347985347985, |
| "loss": 0.6869, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.2137855279417822, |
| "grad_norm": 0.5747571587562561, |
| "learning_rate": 0.0001984126984126984, |
| "loss": 0.7208, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.2148839763833585, |
| "grad_norm": 0.5382461547851562, |
| "learning_rate": 0.00019829059829059824, |
| "loss": 0.6035, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.2159824248249347, |
| "grad_norm": 0.44335421919822693, |
| "learning_rate": 0.00019816849816849816, |
| "loss": 0.8563, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.217080873266511, |
| "grad_norm": 0.3059934675693512, |
| "learning_rate": 0.00019804639804639804, |
| "loss": 0.6422, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.2181793217080874, |
| "grad_norm": 0.4306177794933319, |
| "learning_rate": 0.0001979242979242979, |
| "loss": 0.5347, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.2192777701496635, |
| "grad_norm": 0.5196095705032349, |
| "learning_rate": 0.00019780219780219779, |
| "loss": 0.5996, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.22037621859124, |
| "grad_norm": 0.4814283549785614, |
| "learning_rate": 0.00019768009768009767, |
| "loss": 0.6782, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.2214746670328163, |
| "grad_norm": 0.2287791222333908, |
| "learning_rate": 0.00019755799755799753, |
| "loss": 0.5908, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.2225731154743924, |
| "grad_norm": 0.43044313788414, |
| "learning_rate": 0.00019743589743589742, |
| "loss": 0.6554, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.2236715639159688, |
| "grad_norm": 0.390874445438385, |
| "learning_rate": 0.0001973137973137973, |
| "loss": 0.5777, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.224770012357545, |
| "grad_norm": 0.5380458235740662, |
| "learning_rate": 0.00019719169719169716, |
| "loss": 0.467, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.2258684607991213, |
| "grad_norm": 0.6176440119743347, |
| "learning_rate": 0.00019706959706959704, |
| "loss": 0.5625, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.2269669092406974, |
| "grad_norm": 0.4321332275867462, |
| "learning_rate": 0.00019694749694749693, |
| "loss": 0.7262, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.2280653576822738, |
| "grad_norm": 0.5679623484611511, |
| "learning_rate": 0.0001968253968253968, |
| "loss": 0.8216, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.2291638061238501, |
| "grad_norm": 0.4741218686103821, |
| "learning_rate": 0.00019670329670329667, |
| "loss": 0.7164, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.2302622545654263, |
| "grad_norm": 0.6570267677307129, |
| "learning_rate": 0.00019658119658119659, |
| "loss": 0.7606, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.2313607030070026, |
| "grad_norm": 0.4256306290626526, |
| "learning_rate": 0.00019645909645909644, |
| "loss": 0.5137, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.232459151448579, |
| "grad_norm": 0.4444984793663025, |
| "learning_rate": 0.00019633699633699633, |
| "loss": 0.8863, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.2335575998901551, |
| "grad_norm": 0.458133339881897, |
| "learning_rate": 0.00019621489621489622, |
| "loss": 0.6445, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.2346560483317315, |
| "grad_norm": 0.6087627410888672, |
| "learning_rate": 0.00019609279609279607, |
| "loss": 0.5625, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.2357544967733076, |
| "grad_norm": 0.42782312631607056, |
| "learning_rate": 0.00019597069597069596, |
| "loss": 0.6321, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.236852945214884, |
| "grad_norm": 0.49623987078666687, |
| "learning_rate": 0.00019584859584859585, |
| "loss": 0.6473, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.2379513936564603, |
| "grad_norm": 0.5348198413848877, |
| "learning_rate": 0.0001957264957264957, |
| "loss": 0.6948, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.2390498420980365, |
| "grad_norm": 0.44476062059402466, |
| "learning_rate": 0.0001956043956043956, |
| "loss": 0.5917, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.2401482905396128, |
| "grad_norm": 0.5777286291122437, |
| "learning_rate": 0.00019548229548229547, |
| "loss": 0.7474, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.241246738981189, |
| "grad_norm": 0.3132689893245697, |
| "learning_rate": 0.00019536019536019533, |
| "loss": 0.5827, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2423451874227653, |
| "grad_norm": 0.3898192346096039, |
| "learning_rate": 0.00019523809523809522, |
| "loss": 0.5469, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.2434436358643417, |
| "grad_norm": 0.338693767786026, |
| "learning_rate": 0.00019511599511599508, |
| "loss": 0.704, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.2445420843059178, |
| "grad_norm": 0.4276609718799591, |
| "learning_rate": 0.00019499389499389496, |
| "loss": 0.7269, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.2456405327474942, |
| "grad_norm": 0.7320281863212585, |
| "learning_rate": 0.00019487179487179487, |
| "loss": 0.62, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.2467389811890706, |
| "grad_norm": 0.4023820757865906, |
| "learning_rate": 0.0001947496947496947, |
| "loss": 0.4234, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.2478374296306467, |
| "grad_norm": 0.3218212425708771, |
| "learning_rate": 0.00019462759462759462, |
| "loss": 0.5325, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.248935878072223, |
| "grad_norm": 0.45131513476371765, |
| "learning_rate": 0.0001945054945054945, |
| "loss": 0.5667, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.2500343265137992, |
| "grad_norm": 0.604475200176239, |
| "learning_rate": 0.00019438339438339436, |
| "loss": 0.9018, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.2511327749553756, |
| "grad_norm": 0.46968311071395874, |
| "learning_rate": 0.00019426129426129425, |
| "loss": 0.7946, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.2522312233969517, |
| "grad_norm": 0.3960346281528473, |
| "learning_rate": 0.00019413919413919413, |
| "loss": 0.7719, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.253329671838528, |
| "grad_norm": 0.5146461129188538, |
| "learning_rate": 0.000194017094017094, |
| "loss": 0.8946, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.2544281202801044, |
| "grad_norm": 0.6343802809715271, |
| "learning_rate": 0.00019389499389499388, |
| "loss": 0.7822, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.2555265687216806, |
| "grad_norm": 0.4646434485912323, |
| "learning_rate": 0.00019377289377289376, |
| "loss": 0.6722, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.256625017163257, |
| "grad_norm": 0.48127877712249756, |
| "learning_rate": 0.00019365079365079362, |
| "loss": 0.9059, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.2577234656048333, |
| "grad_norm": 0.4040716290473938, |
| "learning_rate": 0.0001935286935286935, |
| "loss": 0.7288, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.2588219140464094, |
| "grad_norm": 0.43992865085601807, |
| "learning_rate": 0.0001934065934065934, |
| "loss": 0.5804, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.2599203624879858, |
| "grad_norm": 0.41578513383865356, |
| "learning_rate": 0.00019328449328449325, |
| "loss": 0.5459, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.261018810929562, |
| "grad_norm": 0.40165719389915466, |
| "learning_rate": 0.00019316239316239314, |
| "loss": 0.6001, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.2621172593711383, |
| "grad_norm": 0.43200212717056274, |
| "learning_rate": 0.00019304029304029305, |
| "loss": 0.8712, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.2632157078127144, |
| "grad_norm": 0.3217264413833618, |
| "learning_rate": 0.00019291819291819288, |
| "loss": 0.6074, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2643141562542908, |
| "grad_norm": 0.3964528441429138, |
| "learning_rate": 0.0001927960927960928, |
| "loss": 0.6131, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.2654126046958671, |
| "grad_norm": 0.5151070952415466, |
| "learning_rate": 0.00019267399267399268, |
| "loss": 0.6992, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.2665110531374433, |
| "grad_norm": 0.5902129411697388, |
| "learning_rate": 0.00019255189255189254, |
| "loss": 0.7311, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.2676095015790196, |
| "grad_norm": 0.5386108160018921, |
| "learning_rate": 0.00019242979242979242, |
| "loss": 0.6469, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.268707950020596, |
| "grad_norm": 0.384093701839447, |
| "learning_rate": 0.0001923076923076923, |
| "loss": 0.7111, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.2698063984621721, |
| "grad_norm": 0.34160250425338745, |
| "learning_rate": 0.00019218559218559217, |
| "loss": 0.5396, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.2709048469037485, |
| "grad_norm": 0.6590912938117981, |
| "learning_rate": 0.00019206349206349205, |
| "loss": 1.1613, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.2720032953453249, |
| "grad_norm": 0.6230842471122742, |
| "learning_rate": 0.0001919413919413919, |
| "loss": 0.7701, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.273101743786901, |
| "grad_norm": 0.3881864547729492, |
| "learning_rate": 0.0001918192918192918, |
| "loss": 0.633, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.2742001922284774, |
| "grad_norm": 0.4538264274597168, |
| "learning_rate": 0.00019169719169719168, |
| "loss": 0.451, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2752986406700535, |
| "grad_norm": 0.6188018321990967, |
| "learning_rate": 0.00019157509157509154, |
| "loss": 0.9563, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.2763970891116299, |
| "grad_norm": 0.4172852039337158, |
| "learning_rate": 0.00019145299145299142, |
| "loss": 0.8284, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.277495537553206, |
| "grad_norm": 0.338623583316803, |
| "learning_rate": 0.0001913308913308913, |
| "loss": 0.6745, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.2785939859947824, |
| "grad_norm": 0.3960900902748108, |
| "learning_rate": 0.00019120879120879117, |
| "loss": 0.6508, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.2796924344363587, |
| "grad_norm": 0.37232962250709534, |
| "learning_rate": 0.00019108669108669108, |
| "loss": 0.7347, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.2807908828779349, |
| "grad_norm": 0.47092223167419434, |
| "learning_rate": 0.00019096459096459097, |
| "loss": 0.8251, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.2818893313195112, |
| "grad_norm": 0.4647108316421509, |
| "learning_rate": 0.00019084249084249082, |
| "loss": 0.556, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.2829877797610876, |
| "grad_norm": 0.5812810659408569, |
| "learning_rate": 0.0001907203907203907, |
| "loss": 0.6802, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.2840862282026637, |
| "grad_norm": 0.3731052279472351, |
| "learning_rate": 0.0001905982905982906, |
| "loss": 0.6384, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.28518467664424, |
| "grad_norm": 0.47995856404304504, |
| "learning_rate": 0.00019047619047619045, |
| "loss": 0.4914, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2862831250858162, |
| "grad_norm": 0.3223705589771271, |
| "learning_rate": 0.00019035409035409034, |
| "loss": 0.6676, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.2873815735273926, |
| "grad_norm": 0.5643377304077148, |
| "learning_rate": 0.00019023199023199023, |
| "loss": 0.8224, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.2884800219689687, |
| "grad_norm": 0.48324450850486755, |
| "learning_rate": 0.00019010989010989008, |
| "loss": 0.8005, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.289578470410545, |
| "grad_norm": 0.40516728162765503, |
| "learning_rate": 0.00018998778998778997, |
| "loss": 0.5463, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.2906769188521214, |
| "grad_norm": 0.45521625876426697, |
| "learning_rate": 0.00018986568986568985, |
| "loss": 0.7562, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.2917753672936976, |
| "grad_norm": 0.38747909665107727, |
| "learning_rate": 0.0001897435897435897, |
| "loss": 0.5074, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.292873815735274, |
| "grad_norm": 0.39688000082969666, |
| "learning_rate": 0.0001896214896214896, |
| "loss": 0.3551, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.2939722641768503, |
| "grad_norm": 0.6891604065895081, |
| "learning_rate": 0.0001894993894993895, |
| "loss": 0.601, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.2950707126184264, |
| "grad_norm": 0.5177300572395325, |
| "learning_rate": 0.00018937728937728934, |
| "loss": 0.5188, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.2961691610600028, |
| "grad_norm": 0.3166979253292084, |
| "learning_rate": 0.00018925518925518926, |
| "loss": 0.8411, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2972676095015792, |
| "grad_norm": 0.6637437343597412, |
| "learning_rate": 0.00018913308913308914, |
| "loss": 0.7256, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.2983660579431553, |
| "grad_norm": 0.424932599067688, |
| "learning_rate": 0.000189010989010989, |
| "loss": 0.783, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.2994645063847314, |
| "grad_norm": 0.47751033306121826, |
| "learning_rate": 0.00018888888888888888, |
| "loss": 0.7039, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.3005629548263078, |
| "grad_norm": 0.4332704544067383, |
| "learning_rate": 0.00018876678876678874, |
| "loss": 0.4797, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.3016614032678842, |
| "grad_norm": 0.439431756734848, |
| "learning_rate": 0.00018864468864468863, |
| "loss": 0.6256, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.3027598517094603, |
| "grad_norm": 0.4334176480770111, |
| "learning_rate": 0.00018852258852258851, |
| "loss": 0.5583, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.3038583001510367, |
| "grad_norm": 0.42080724239349365, |
| "learning_rate": 0.00018840048840048837, |
| "loss": 0.461, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.304956748592613, |
| "grad_norm": 0.41007399559020996, |
| "learning_rate": 0.00018827838827838826, |
| "loss": 0.4746, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.3060551970341892, |
| "grad_norm": 0.3763822019100189, |
| "learning_rate": 0.00018815628815628814, |
| "loss": 0.5352, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.3071536454757655, |
| "grad_norm": 0.5557730197906494, |
| "learning_rate": 0.000188034188034188, |
| "loss": 0.5404, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3082520939173419, |
| "grad_norm": 0.43677788972854614, |
| "learning_rate": 0.0001879120879120879, |
| "loss": 0.7111, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.309350542358918, |
| "grad_norm": 0.6084219217300415, |
| "learning_rate": 0.00018778998778998777, |
| "loss": 0.7524, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.3104489908004944, |
| "grad_norm": 0.7219144701957703, |
| "learning_rate": 0.00018766788766788763, |
| "loss": 0.6182, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.3115474392420705, |
| "grad_norm": 0.5280331969261169, |
| "learning_rate": 0.00018754578754578752, |
| "loss": 0.8023, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.3126458876836469, |
| "grad_norm": 0.42130032181739807, |
| "learning_rate": 0.00018742368742368743, |
| "loss": 0.5673, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.313744336125223, |
| "grad_norm": 0.6063292026519775, |
| "learning_rate": 0.0001873015873015873, |
| "loss": 0.6438, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.3148427845667994, |
| "grad_norm": 0.4073690176010132, |
| "learning_rate": 0.00018717948717948717, |
| "loss": 0.7099, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.3159412330083757, |
| "grad_norm": 0.5419113636016846, |
| "learning_rate": 0.00018705738705738706, |
| "loss": 0.6451, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.3170396814499519, |
| "grad_norm": 0.4489867091178894, |
| "learning_rate": 0.00018693528693528692, |
| "loss": 0.7522, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.3181381298915282, |
| "grad_norm": 0.3536837697029114, |
| "learning_rate": 0.0001868131868131868, |
| "loss": 0.6201, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3192365783331046, |
| "grad_norm": 0.42462313175201416, |
| "learning_rate": 0.0001866910866910867, |
| "loss": 0.4804, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.3203350267746807, |
| "grad_norm": 0.612319827079773, |
| "learning_rate": 0.00018656898656898655, |
| "loss": 0.8546, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.321433475216257, |
| "grad_norm": 0.5242000222206116, |
| "learning_rate": 0.00018644688644688643, |
| "loss": 0.7577, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.3225319236578332, |
| "grad_norm": 0.5688628554344177, |
| "learning_rate": 0.00018632478632478632, |
| "loss": 0.6645, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.3236303720994096, |
| "grad_norm": 0.3695731461048126, |
| "learning_rate": 0.00018620268620268618, |
| "loss": 0.4979, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.3247288205409857, |
| "grad_norm": 0.44525593519210815, |
| "learning_rate": 0.00018608058608058606, |
| "loss": 0.807, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.325827268982562, |
| "grad_norm": 0.37627971172332764, |
| "learning_rate": 0.00018595848595848595, |
| "loss": 0.6584, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.3269257174241385, |
| "grad_norm": 0.39727315306663513, |
| "learning_rate": 0.0001858363858363858, |
| "loss": 0.5565, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.3280241658657146, |
| "grad_norm": 0.4151424169540405, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 0.81, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.329122614307291, |
| "grad_norm": 0.37529075145721436, |
| "learning_rate": 0.00018559218559218555, |
| "loss": 0.6188, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.3302210627488673, |
| "grad_norm": 0.43061408400535583, |
| "learning_rate": 0.00018547008547008546, |
| "loss": 0.814, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.3313195111904434, |
| "grad_norm": 0.437511682510376, |
| "learning_rate": 0.00018534798534798535, |
| "loss": 0.55, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.3324179596320198, |
| "grad_norm": 0.5172685980796814, |
| "learning_rate": 0.0001852258852258852, |
| "loss": 0.6551, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.3335164080735962, |
| "grad_norm": 0.3292716443538666, |
| "learning_rate": 0.0001851037851037851, |
| "loss": 0.5108, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.3346148565151723, |
| "grad_norm": 0.7129474878311157, |
| "learning_rate": 0.00018498168498168498, |
| "loss": 0.7197, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.3357133049567487, |
| "grad_norm": 0.46317145228385925, |
| "learning_rate": 0.00018485958485958483, |
| "loss": 0.6553, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.3368117533983248, |
| "grad_norm": 0.5539398789405823, |
| "learning_rate": 0.00018473748473748472, |
| "loss": 0.7057, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.3379102018399012, |
| "grad_norm": 0.40555253624916077, |
| "learning_rate": 0.0001846153846153846, |
| "loss": 0.5976, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.3390086502814773, |
| "grad_norm": 0.462704062461853, |
| "learning_rate": 0.00018449328449328446, |
| "loss": 0.7018, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.3401070987230537, |
| "grad_norm": 0.407287061214447, |
| "learning_rate": 0.00018437118437118435, |
| "loss": 0.4726, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.34120554716463, |
| "grad_norm": 0.3654995858669281, |
| "learning_rate": 0.00018424908424908423, |
| "loss": 0.5811, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.3423039956062062, |
| "grad_norm": 0.46455878019332886, |
| "learning_rate": 0.0001841269841269841, |
| "loss": 0.8998, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.3434024440477825, |
| "grad_norm": 0.47929346561431885, |
| "learning_rate": 0.00018400488400488398, |
| "loss": 0.7348, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.344500892489359, |
| "grad_norm": 0.7128652930259705, |
| "learning_rate": 0.0001838827838827839, |
| "loss": 1.2647, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.345599340930935, |
| "grad_norm": 0.3956572413444519, |
| "learning_rate": 0.00018376068376068372, |
| "loss": 0.6985, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3466977893725114, |
| "grad_norm": 0.5585309863090515, |
| "learning_rate": 0.00018363858363858364, |
| "loss": 1.0086, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.3477962378140875, |
| "grad_norm": 1.5960838794708252, |
| "learning_rate": 0.00018351648351648352, |
| "loss": 0.644, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.3488946862556639, |
| "grad_norm": 0.6499342322349548, |
| "learning_rate": 0.00018339438339438338, |
| "loss": 0.7698, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.34999313469724, |
| "grad_norm": 0.42246925830841064, |
| "learning_rate": 0.00018327228327228326, |
| "loss": 0.5614, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.3510915831388164, |
| "grad_norm": 0.42192572355270386, |
| "learning_rate": 0.00018315018315018315, |
| "loss": 0.7726, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3521900315803927, |
| "grad_norm": 0.6409221887588501, |
| "learning_rate": 0.000183028083028083, |
| "loss": 0.5928, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.3532884800219689, |
| "grad_norm": 1.328852653503418, |
| "learning_rate": 0.0001829059829059829, |
| "loss": 0.7861, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.3543869284635452, |
| "grad_norm": 0.4519331753253937, |
| "learning_rate": 0.00018278388278388275, |
| "loss": 0.5938, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.3554853769051216, |
| "grad_norm": 0.3942720592021942, |
| "learning_rate": 0.00018266178266178264, |
| "loss": 0.4781, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.3565838253466977, |
| "grad_norm": 0.5066869258880615, |
| "learning_rate": 0.00018253968253968252, |
| "loss": 0.8069, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.357682273788274, |
| "grad_norm": 0.37002792954444885, |
| "learning_rate": 0.00018241758241758238, |
| "loss": 0.5737, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.3587807222298505, |
| "grad_norm": 0.3738810122013092, |
| "learning_rate": 0.00018229548229548227, |
| "loss": 0.5169, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.3598791706714266, |
| "grad_norm": 0.44956260919570923, |
| "learning_rate": 0.00018217338217338215, |
| "loss": 0.5614, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.3609776191130027, |
| "grad_norm": 0.34839004278182983, |
| "learning_rate": 0.000182051282051282, |
| "loss": 0.5783, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.362076067554579, |
| "grad_norm": 0.30152127146720886, |
| "learning_rate": 0.00018192918192918192, |
| "loss": 0.4321, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3631745159961555, |
| "grad_norm": 0.6672345399856567, |
| "learning_rate": 0.0001818070818070818, |
| "loss": 0.6073, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.3642729644377316, |
| "grad_norm": 0.45652687549591064, |
| "learning_rate": 0.00018168498168498167, |
| "loss": 0.6193, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.365371412879308, |
| "grad_norm": 0.6392306089401245, |
| "learning_rate": 0.00018156288156288155, |
| "loss": 0.8388, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.3664698613208843, |
| "grad_norm": 0.5510252714157104, |
| "learning_rate": 0.00018144078144078144, |
| "loss": 0.6512, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.3675683097624605, |
| "grad_norm": 0.38780227303504944, |
| "learning_rate": 0.0001813186813186813, |
| "loss": 0.6835, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.3686667582040368, |
| "grad_norm": 0.47472965717315674, |
| "learning_rate": 0.00018119658119658118, |
| "loss": 0.6625, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.3697652066456132, |
| "grad_norm": 0.3599228262901306, |
| "learning_rate": 0.00018107448107448107, |
| "loss": 0.5063, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.3708636550871893, |
| "grad_norm": 0.3284567892551422, |
| "learning_rate": 0.00018095238095238093, |
| "loss": 0.7679, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.3719621035287657, |
| "grad_norm": 0.5258575081825256, |
| "learning_rate": 0.0001808302808302808, |
| "loss": 0.6213, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.3730605519703418, |
| "grad_norm": 0.3211069405078888, |
| "learning_rate": 0.0001807081807081807, |
| "loss": 0.5306, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3741590004119182, |
| "grad_norm": 0.6325588822364807, |
| "learning_rate": 0.00018058608058608056, |
| "loss": 0.8104, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.3752574488534943, |
| "grad_norm": 0.4994303584098816, |
| "learning_rate": 0.00018046398046398044, |
| "loss": 0.6464, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.3763558972950707, |
| "grad_norm": 0.3013019263744354, |
| "learning_rate": 0.00018034188034188035, |
| "loss": 0.4749, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.377454345736647, |
| "grad_norm": 1.0342131853103638, |
| "learning_rate": 0.00018021978021978018, |
| "loss": 0.7995, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.3785527941782232, |
| "grad_norm": 0.40213823318481445, |
| "learning_rate": 0.0001800976800976801, |
| "loss": 0.8791, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.3796512426197995, |
| "grad_norm": 0.37126532196998596, |
| "learning_rate": 0.00017997557997557998, |
| "loss": 0.551, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.380749691061376, |
| "grad_norm": 0.3417685031890869, |
| "learning_rate": 0.00017985347985347984, |
| "loss": 0.583, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.381848139502952, |
| "grad_norm": 0.33571329712867737, |
| "learning_rate": 0.00017973137973137973, |
| "loss": 0.4927, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.3829465879445284, |
| "grad_norm": 0.5128073692321777, |
| "learning_rate": 0.00017960927960927959, |
| "loss": 0.5903, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.3840450363861048, |
| "grad_norm": 0.5345245599746704, |
| "learning_rate": 0.00017948717948717947, |
| "loss": 0.5828, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.385143484827681, |
| "grad_norm": 0.312639981508255, |
| "learning_rate": 0.00017936507936507936, |
| "loss": 0.6905, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.386241933269257, |
| "grad_norm": 0.4795394837856293, |
| "learning_rate": 0.00017924297924297921, |
| "loss": 0.6193, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.3873403817108334, |
| "grad_norm": 0.39672231674194336, |
| "learning_rate": 0.0001791208791208791, |
| "loss": 0.7833, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.3884388301524098, |
| "grad_norm": 0.46752655506134033, |
| "learning_rate": 0.00017899877899877899, |
| "loss": 0.6385, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.389537278593986, |
| "grad_norm": 0.5376736521720886, |
| "learning_rate": 0.00017887667887667884, |
| "loss": 0.6362, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.3906357270355623, |
| "grad_norm": 0.5675904750823975, |
| "learning_rate": 0.00017875457875457873, |
| "loss": 0.7975, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.3917341754771386, |
| "grad_norm": 0.5429015755653381, |
| "learning_rate": 0.00017863247863247861, |
| "loss": 0.5415, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.3928326239187148, |
| "grad_norm": 0.3714626729488373, |
| "learning_rate": 0.00017851037851037847, |
| "loss": 0.7104, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.3939310723602911, |
| "grad_norm": 0.7549324035644531, |
| "learning_rate": 0.00017838827838827836, |
| "loss": 0.698, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.3950295208018675, |
| "grad_norm": 0.36867257952690125, |
| "learning_rate": 0.00017826617826617827, |
| "loss": 0.6019, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3961279692434436, |
| "grad_norm": 0.42439624667167664, |
| "learning_rate": 0.00017814407814407813, |
| "loss": 0.4626, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.39722641768502, |
| "grad_norm": 0.4768877923488617, |
| "learning_rate": 0.00017802197802197802, |
| "loss": 0.671, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.3983248661265961, |
| "grad_norm": 0.3415908217430115, |
| "learning_rate": 0.0001778998778998779, |
| "loss": 0.5904, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.3994233145681725, |
| "grad_norm": 0.5370535850524902, |
| "learning_rate": 0.00017777777777777776, |
| "loss": 0.578, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.4005217630097486, |
| "grad_norm": 0.61114901304245, |
| "learning_rate": 0.00017765567765567764, |
| "loss": 0.6498, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.401620211451325, |
| "grad_norm": 0.3491772711277008, |
| "learning_rate": 0.00017753357753357753, |
| "loss": 0.6057, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.4027186598929013, |
| "grad_norm": 0.4992705285549164, |
| "learning_rate": 0.0001774114774114774, |
| "loss": 0.8541, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.4038171083344775, |
| "grad_norm": 0.5476379990577698, |
| "learning_rate": 0.00017728937728937727, |
| "loss": 0.5608, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.4049155567760538, |
| "grad_norm": 0.6107895374298096, |
| "learning_rate": 0.00017716727716727716, |
| "loss": 0.7437, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.4060140052176302, |
| "grad_norm": 0.510809600353241, |
| "learning_rate": 0.00017704517704517702, |
| "loss": 0.6569, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4071124536592063, |
| "grad_norm": 0.5050077438354492, |
| "learning_rate": 0.0001769230769230769, |
| "loss": 0.6566, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.4082109021007827, |
| "grad_norm": 0.44812703132629395, |
| "learning_rate": 0.0001768009768009768, |
| "loss": 0.6557, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.4093093505423588, |
| "grad_norm": 0.5216537714004517, |
| "learning_rate": 0.00017667887667887665, |
| "loss": 0.7311, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.4104077989839352, |
| "grad_norm": 0.5608856081962585, |
| "learning_rate": 0.00017655677655677656, |
| "loss": 0.9001, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.4115062474255113, |
| "grad_norm": 0.47205066680908203, |
| "learning_rate": 0.0001764346764346764, |
| "loss": 0.5214, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.4126046958670877, |
| "grad_norm": 0.4073629081249237, |
| "learning_rate": 0.0001763125763125763, |
| "loss": 0.483, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.413703144308664, |
| "grad_norm": 0.42381593585014343, |
| "learning_rate": 0.0001761904761904762, |
| "loss": 0.4895, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.4148015927502402, |
| "grad_norm": 0.629356861114502, |
| "learning_rate": 0.00017606837606837605, |
| "loss": 0.4639, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.4159000411918166, |
| "grad_norm": 0.3123486340045929, |
| "learning_rate": 0.00017594627594627593, |
| "loss": 0.4575, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.416998489633393, |
| "grad_norm": 0.4163682460784912, |
| "learning_rate": 0.00017582417582417582, |
| "loss": 0.7511, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.418096938074969, |
| "grad_norm": 0.5697455406188965, |
| "learning_rate": 0.00017570207570207568, |
| "loss": 0.5977, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.4191953865165454, |
| "grad_norm": 0.39232510328292847, |
| "learning_rate": 0.00017557997557997556, |
| "loss": 0.6133, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.4202938349581218, |
| "grad_norm": 0.5452993512153625, |
| "learning_rate": 0.00017545787545787545, |
| "loss": 0.6596, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.421392283399698, |
| "grad_norm": 0.39080601930618286, |
| "learning_rate": 0.0001753357753357753, |
| "loss": 0.7422, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.4224907318412743, |
| "grad_norm": 0.6513398289680481, |
| "learning_rate": 0.0001752136752136752, |
| "loss": 0.5277, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.4235891802828504, |
| "grad_norm": 0.4627130329608917, |
| "learning_rate": 0.00017509157509157508, |
| "loss": 0.6296, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.4246876287244268, |
| "grad_norm": 0.499700129032135, |
| "learning_rate": 0.00017496947496947494, |
| "loss": 0.689, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.425786077166003, |
| "grad_norm": 0.4668709635734558, |
| "learning_rate": 0.00017484737484737482, |
| "loss": 0.784, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.4268845256075793, |
| "grad_norm": 0.6378145217895508, |
| "learning_rate": 0.00017472527472527473, |
| "loss": 0.5077, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.4279829740491556, |
| "grad_norm": 0.6320174336433411, |
| "learning_rate": 0.00017460317460317457, |
| "loss": 1.061, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4290814224907318, |
| "grad_norm": 0.48719078302383423, |
| "learning_rate": 0.00017448107448107448, |
| "loss": 0.7181, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.4301798709323081, |
| "grad_norm": 0.5345287919044495, |
| "learning_rate": 0.00017435897435897436, |
| "loss": 0.5599, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.4312783193738845, |
| "grad_norm": 0.567857563495636, |
| "learning_rate": 0.00017423687423687422, |
| "loss": 0.6294, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.4323767678154606, |
| "grad_norm": 0.5715040564537048, |
| "learning_rate": 0.0001741147741147741, |
| "loss": 0.5326, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.433475216257037, |
| "grad_norm": 0.40048834681510925, |
| "learning_rate": 0.000173992673992674, |
| "loss": 0.687, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.4345736646986131, |
| "grad_norm": 0.4964540898799896, |
| "learning_rate": 0.00017387057387057385, |
| "loss": 0.6149, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.4356721131401895, |
| "grad_norm": 0.5018569231033325, |
| "learning_rate": 0.00017374847374847374, |
| "loss": 0.4224, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.4367705615817656, |
| "grad_norm": 0.6026094555854797, |
| "learning_rate": 0.00017362637362637362, |
| "loss": 0.8934, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.437869010023342, |
| "grad_norm": 0.33409950137138367, |
| "learning_rate": 0.00017350427350427348, |
| "loss": 0.6725, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.4389674584649184, |
| "grad_norm": 0.43982234597206116, |
| "learning_rate": 0.00017338217338217337, |
| "loss": 0.9203, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4400659069064945, |
| "grad_norm": 0.843877911567688, |
| "learning_rate": 0.00017326007326007322, |
| "loss": 0.6028, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.4411643553480709, |
| "grad_norm": 0.35148733854293823, |
| "learning_rate": 0.0001731379731379731, |
| "loss": 0.7503, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.4422628037896472, |
| "grad_norm": 0.4561845362186432, |
| "learning_rate": 0.000173015873015873, |
| "loss": 0.6577, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.4433612522312234, |
| "grad_norm": 0.47295713424682617, |
| "learning_rate": 0.00017289377289377285, |
| "loss": 0.8013, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.4444597006727997, |
| "grad_norm": 0.46340033411979675, |
| "learning_rate": 0.00017277167277167277, |
| "loss": 0.73, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.445558149114376, |
| "grad_norm": 0.49221453070640564, |
| "learning_rate": 0.00017264957264957265, |
| "loss": 0.6735, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.4466565975559522, |
| "grad_norm": 0.36250925064086914, |
| "learning_rate": 0.0001725274725274725, |
| "loss": 0.7463, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.4477550459975284, |
| "grad_norm": 0.3832615911960602, |
| "learning_rate": 0.0001724053724053724, |
| "loss": 0.7295, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.4488534944391047, |
| "grad_norm": 0.7413591742515564, |
| "learning_rate": 0.00017228327228327228, |
| "loss": 0.7627, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.449951942880681, |
| "grad_norm": 0.45626765489578247, |
| "learning_rate": 0.00017216117216117214, |
| "loss": 0.727, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4510503913222572, |
| "grad_norm": 0.3024120330810547, |
| "learning_rate": 0.00017203907203907202, |
| "loss": 0.3986, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.4521488397638336, |
| "grad_norm": 0.31635284423828125, |
| "learning_rate": 0.0001719169719169719, |
| "loss": 0.3469, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.45324728820541, |
| "grad_norm": 0.36893391609191895, |
| "learning_rate": 0.00017179487179487177, |
| "loss": 0.7017, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.454345736646986, |
| "grad_norm": 0.4804024398326874, |
| "learning_rate": 0.00017167277167277165, |
| "loss": 0.8811, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.4554441850885624, |
| "grad_norm": 0.4446522295475006, |
| "learning_rate": 0.00017155067155067154, |
| "loss": 0.8027, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4565426335301388, |
| "grad_norm": 0.27936413884162903, |
| "learning_rate": 0.0001714285714285714, |
| "loss": 0.3846, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.457641081971715, |
| "grad_norm": 0.3312259316444397, |
| "learning_rate": 0.00017130647130647128, |
| "loss": 0.4852, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.4587395304132913, |
| "grad_norm": 0.4751642644405365, |
| "learning_rate": 0.0001711843711843712, |
| "loss": 0.7337, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.4598379788548674, |
| "grad_norm": 0.5365067720413208, |
| "learning_rate": 0.00017106227106227103, |
| "loss": 0.8052, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.4609364272964438, |
| "grad_norm": 0.5944942831993103, |
| "learning_rate": 0.00017094017094017094, |
| "loss": 0.7673, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.46203487573802, |
| "grad_norm": 0.48244431614875793, |
| "learning_rate": 0.00017081807081807083, |
| "loss": 0.855, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.4631333241795963, |
| "grad_norm": 0.32348135113716125, |
| "learning_rate": 0.00017069597069597068, |
| "loss": 0.5133, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.4642317726211727, |
| "grad_norm": 0.6455866694450378, |
| "learning_rate": 0.00017057387057387057, |
| "loss": 0.6825, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.4653302210627488, |
| "grad_norm": 0.3937522768974304, |
| "learning_rate": 0.00017045177045177045, |
| "loss": 0.6335, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.4664286695043252, |
| "grad_norm": 0.33579352498054504, |
| "learning_rate": 0.0001703296703296703, |
| "loss": 0.4711, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.4675271179459015, |
| "grad_norm": 0.5055533647537231, |
| "learning_rate": 0.0001702075702075702, |
| "loss": 0.6512, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.4686255663874777, |
| "grad_norm": 0.40702182054519653, |
| "learning_rate": 0.00017008547008547006, |
| "loss": 0.8833, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.469724014829054, |
| "grad_norm": 0.3574135899543762, |
| "learning_rate": 0.00016996336996336994, |
| "loss": 0.7127, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.4708224632706302, |
| "grad_norm": 0.45641472935676575, |
| "learning_rate": 0.00016984126984126983, |
| "loss": 0.7258, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.4719209117122065, |
| "grad_norm": 1.5012352466583252, |
| "learning_rate": 0.0001697191697191697, |
| "loss": 0.8065, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.4730193601537827, |
| "grad_norm": 0.5025885701179504, |
| "learning_rate": 0.00016959706959706957, |
| "loss": 0.9377, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.474117808595359, |
| "grad_norm": 0.2942202687263489, |
| "learning_rate": 0.00016947496947496946, |
| "loss": 0.5693, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.4752162570369354, |
| "grad_norm": 0.48770126700401306, |
| "learning_rate": 0.00016935286935286932, |
| "loss": 0.5483, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.4763147054785115, |
| "grad_norm": 0.3853349983692169, |
| "learning_rate": 0.0001692307692307692, |
| "loss": 0.5787, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.4774131539200879, |
| "grad_norm": 0.3593169152736664, |
| "learning_rate": 0.00016910866910866911, |
| "loss": 0.6426, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.4785116023616642, |
| "grad_norm": 0.5932713150978088, |
| "learning_rate": 0.00016898656898656897, |
| "loss": 0.7543, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.4796100508032404, |
| "grad_norm": 0.43406638503074646, |
| "learning_rate": 0.00016886446886446886, |
| "loss": 0.7868, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.4807084992448167, |
| "grad_norm": 0.38596048951148987, |
| "learning_rate": 0.00016874236874236874, |
| "loss": 0.49, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.481806947686393, |
| "grad_norm": 0.42844533920288086, |
| "learning_rate": 0.0001686202686202686, |
| "loss": 0.6485, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.4829053961279692, |
| "grad_norm": 0.5165280103683472, |
| "learning_rate": 0.0001684981684981685, |
| "loss": 0.6924, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4840038445695456, |
| "grad_norm": 0.5717988610267639, |
| "learning_rate": 0.00016837606837606837, |
| "loss": 0.5624, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.4851022930111217, |
| "grad_norm": 0.4384293556213379, |
| "learning_rate": 0.00016825396825396823, |
| "loss": 0.7895, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.486200741452698, |
| "grad_norm": 0.5472243428230286, |
| "learning_rate": 0.00016813186813186812, |
| "loss": 0.8838, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.4872991898942742, |
| "grad_norm": 0.3903232216835022, |
| "learning_rate": 0.000168009768009768, |
| "loss": 0.5452, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.4883976383358506, |
| "grad_norm": 0.3799583613872528, |
| "learning_rate": 0.00016788766788766786, |
| "loss": 0.8931, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.489496086777427, |
| "grad_norm": 0.4481349289417267, |
| "learning_rate": 0.00016776556776556775, |
| "loss": 0.5956, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.490594535219003, |
| "grad_norm": 0.45875266194343567, |
| "learning_rate": 0.00016764346764346763, |
| "loss": 0.4729, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.4916929836605795, |
| "grad_norm": 0.494112104177475, |
| "learning_rate": 0.0001675213675213675, |
| "loss": 0.6416, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.4927914321021558, |
| "grad_norm": 0.3976772725582123, |
| "learning_rate": 0.0001673992673992674, |
| "loss": 0.6601, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.493889880543732, |
| "grad_norm": 0.29009610414505005, |
| "learning_rate": 0.0001672771672771673, |
| "loss": 0.4261, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.4949883289853083, |
| "grad_norm": 0.5540419816970825, |
| "learning_rate": 0.00016715506715506715, |
| "loss": 0.8206, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.4960867774268845, |
| "grad_norm": 0.41308313608169556, |
| "learning_rate": 0.00016703296703296703, |
| "loss": 0.7862, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.4971852258684608, |
| "grad_norm": 0.6565150618553162, |
| "learning_rate": 0.0001669108669108669, |
| "loss": 0.6963, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.498283674310037, |
| "grad_norm": 0.4901321530342102, |
| "learning_rate": 0.00016678876678876678, |
| "loss": 0.7063, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.4993821227516133, |
| "grad_norm": 0.4676086902618408, |
| "learning_rate": 0.00016666666666666666, |
| "loss": 0.5142, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.5004805711931897, |
| "grad_norm": 0.4745628833770752, |
| "learning_rate": 0.00016654456654456652, |
| "loss": 0.7659, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.5015790196347658, |
| "grad_norm": 0.42693057656288147, |
| "learning_rate": 0.0001664224664224664, |
| "loss": 0.9233, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.5026774680763422, |
| "grad_norm": 0.4110391139984131, |
| "learning_rate": 0.0001663003663003663, |
| "loss": 0.5062, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.5037759165179185, |
| "grad_norm": 0.3090996742248535, |
| "learning_rate": 0.00016617826617826615, |
| "loss": 0.4462, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.5048743649594947, |
| "grad_norm": 0.42027410864830017, |
| "learning_rate": 0.00016605616605616603, |
| "loss": 0.8589, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.505972813401071, |
| "grad_norm": 0.38396796584129333, |
| "learning_rate": 0.00016593406593406592, |
| "loss": 0.6609, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.5070712618426474, |
| "grad_norm": 0.5236012935638428, |
| "learning_rate": 0.00016581196581196578, |
| "loss": 0.6506, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.5081697102842235, |
| "grad_norm": 0.7232113480567932, |
| "learning_rate": 0.00016568986568986566, |
| "loss": 0.6689, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.5092681587257997, |
| "grad_norm": 0.4777502417564392, |
| "learning_rate": 0.00016556776556776558, |
| "loss": 0.5701, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.510366607167376, |
| "grad_norm": 0.39154767990112305, |
| "learning_rate": 0.0001654456654456654, |
| "loss": 0.4906, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5114650556089524, |
| "grad_norm": 0.469382107257843, |
| "learning_rate": 0.00016532356532356532, |
| "loss": 0.5768, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.5125635040505285, |
| "grad_norm": 0.3485945761203766, |
| "learning_rate": 0.0001652014652014652, |
| "loss": 0.7814, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.513661952492105, |
| "grad_norm": 0.4375949203968048, |
| "learning_rate": 0.00016507936507936506, |
| "loss": 0.6328, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.5147604009336813, |
| "grad_norm": 0.47778064012527466, |
| "learning_rate": 0.00016495726495726495, |
| "loss": 0.635, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.5158588493752574, |
| "grad_norm": 0.3515126705169678, |
| "learning_rate": 0.00016483516483516484, |
| "loss": 0.7014, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5169572978168337, |
| "grad_norm": 0.3710018992424011, |
| "learning_rate": 0.0001647130647130647, |
| "loss": 0.7903, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.51805574625841, |
| "grad_norm": 0.37630394101142883, |
| "learning_rate": 0.00016459096459096458, |
| "loss": 0.5446, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.5191541946999862, |
| "grad_norm": 0.4312807321548462, |
| "learning_rate": 0.00016446886446886446, |
| "loss": 0.6101, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.5202526431415624, |
| "grad_norm": 0.399384468793869, |
| "learning_rate": 0.00016434676434676432, |
| "loss": 0.5734, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.521351091583139, |
| "grad_norm": 0.41233471035957336, |
| "learning_rate": 0.0001642246642246642, |
| "loss": 0.6525, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.522449540024715, |
| "grad_norm": 0.5215228199958801, |
| "learning_rate": 0.0001641025641025641, |
| "loss": 0.4804, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.5235479884662912, |
| "grad_norm": 0.42069393396377563, |
| "learning_rate": 0.00016398046398046395, |
| "loss": 0.5517, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.5246464369078676, |
| "grad_norm": 1.7902978658676147, |
| "learning_rate": 0.00016385836385836384, |
| "loss": 0.6295, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.525744885349444, |
| "grad_norm": 0.7353507280349731, |
| "learning_rate": 0.0001637362637362637, |
| "loss": 1.0585, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.52684333379102, |
| "grad_norm": 0.45992404222488403, |
| "learning_rate": 0.0001636141636141636, |
| "loss": 0.7671, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5279417822325965, |
| "grad_norm": 0.3927334249019623, |
| "learning_rate": 0.0001634920634920635, |
| "loss": 0.7479, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.5290402306741728, |
| "grad_norm": 0.32833003997802734, |
| "learning_rate": 0.00016336996336996335, |
| "loss": 0.5774, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.530138679115749, |
| "grad_norm": 0.4306529462337494, |
| "learning_rate": 0.00016324786324786324, |
| "loss": 0.6317, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.5312371275573253, |
| "grad_norm": 0.5411052703857422, |
| "learning_rate": 0.00016312576312576312, |
| "loss": 0.6637, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.5323355759989017, |
| "grad_norm": 0.633800745010376, |
| "learning_rate": 0.00016300366300366298, |
| "loss": 0.7145, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.5334340244404778, |
| "grad_norm": 0.6986578702926636, |
| "learning_rate": 0.00016288156288156287, |
| "loss": 0.7194, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.534532472882054, |
| "grad_norm": 0.5223686695098877, |
| "learning_rate": 0.00016275946275946275, |
| "loss": 0.7849, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.5356309213236303, |
| "grad_norm": 0.5342483520507812, |
| "learning_rate": 0.0001626373626373626, |
| "loss": 0.8885, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.5367293697652067, |
| "grad_norm": 0.5467656850814819, |
| "learning_rate": 0.0001625152625152625, |
| "loss": 0.6265, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.5378278182067828, |
| "grad_norm": 0.4483658969402313, |
| "learning_rate": 0.00016239316239316238, |
| "loss": 0.7133, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5389262666483592, |
| "grad_norm": 0.5714216232299805, |
| "learning_rate": 0.00016227106227106224, |
| "loss": 0.5212, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.5400247150899355, |
| "grad_norm": 0.5487145781517029, |
| "learning_rate": 0.00016214896214896213, |
| "loss": 0.6276, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.5411231635315117, |
| "grad_norm": 0.3687078654766083, |
| "learning_rate": 0.00016202686202686204, |
| "loss": 0.7512, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.542221611973088, |
| "grad_norm": 0.3596762418746948, |
| "learning_rate": 0.00016190476190476187, |
| "loss": 0.7192, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.5433200604146644, |
| "grad_norm": 0.4092305898666382, |
| "learning_rate": 0.00016178266178266178, |
| "loss": 0.7339, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.5444185088562405, |
| "grad_norm": 0.4018193483352661, |
| "learning_rate": 0.00016166056166056167, |
| "loss": 0.7213, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.5455169572978167, |
| "grad_norm": 0.4993208646774292, |
| "learning_rate": 0.00016153846153846153, |
| "loss": 0.6362, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.5466154057393933, |
| "grad_norm": 0.3958855867385864, |
| "learning_rate": 0.0001614163614163614, |
| "loss": 0.8482, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.5477138541809694, |
| "grad_norm": 0.32689765095710754, |
| "learning_rate": 0.0001612942612942613, |
| "loss": 0.6583, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.5488123026225455, |
| "grad_norm": 0.48947611451148987, |
| "learning_rate": 0.00016117216117216116, |
| "loss": 0.6707, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.549910751064122, |
| "grad_norm": 0.3446139395236969, |
| "learning_rate": 0.00016105006105006104, |
| "loss": 0.8914, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.5510091995056983, |
| "grad_norm": 0.585746705532074, |
| "learning_rate": 0.0001609279609279609, |
| "loss": 0.5413, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.5521076479472744, |
| "grad_norm": 0.6561328172683716, |
| "learning_rate": 0.00016080586080586079, |
| "loss": 0.3728, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.5532060963888508, |
| "grad_norm": 0.47158828377723694, |
| "learning_rate": 0.00016068376068376067, |
| "loss": 0.6525, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.5543045448304271, |
| "grad_norm": 0.3676914572715759, |
| "learning_rate": 0.00016056166056166053, |
| "loss": 0.7395, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.5554029932720033, |
| "grad_norm": 0.608076810836792, |
| "learning_rate": 0.00016043956043956041, |
| "loss": 0.5289, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.5565014417135794, |
| "grad_norm": 0.44940462708473206, |
| "learning_rate": 0.0001603174603174603, |
| "loss": 0.6282, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.557599890155156, |
| "grad_norm": 0.48062869906425476, |
| "learning_rate": 0.00016019536019536016, |
| "loss": 0.7438, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.5586983385967321, |
| "grad_norm": 0.43834635615348816, |
| "learning_rate": 0.00016007326007326004, |
| "loss": 0.4248, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.5597967870383083, |
| "grad_norm": 0.5203731060028076, |
| "learning_rate": 0.00015995115995115996, |
| "loss": 0.91, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.5608952354798846, |
| "grad_norm": 0.5766960978507996, |
| "learning_rate": 0.00015982905982905981, |
| "loss": 0.7211, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.561993683921461, |
| "grad_norm": 0.3048666715621948, |
| "learning_rate": 0.0001597069597069597, |
| "loss": 0.5618, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.5630921323630371, |
| "grad_norm": 0.3916679322719574, |
| "learning_rate": 0.00015958485958485959, |
| "loss": 0.6954, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.5641905808046135, |
| "grad_norm": 0.6336612105369568, |
| "learning_rate": 0.00015946275946275944, |
| "loss": 0.6368, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.5652890292461898, |
| "grad_norm": 0.8314816355705261, |
| "learning_rate": 0.00015934065934065933, |
| "loss": 0.7633, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.566387477687766, |
| "grad_norm": 0.46973487734794617, |
| "learning_rate": 0.00015921855921855922, |
| "loss": 0.6915, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.5674859261293423, |
| "grad_norm": 0.48737633228302, |
| "learning_rate": 0.00015909645909645907, |
| "loss": 0.5346, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.5685843745709187, |
| "grad_norm": 0.548876941204071, |
| "learning_rate": 0.00015897435897435896, |
| "loss": 1.0449, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.5696828230124948, |
| "grad_norm": 0.5039654970169067, |
| "learning_rate": 0.00015885225885225884, |
| "loss": 0.9953, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.570781271454071, |
| "grad_norm": 0.7233378887176514, |
| "learning_rate": 0.0001587301587301587, |
| "loss": 0.7068, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5718797198956473, |
| "grad_norm": 0.5767638683319092, |
| "learning_rate": 0.0001586080586080586, |
| "loss": 0.8055, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.5729781683372237, |
| "grad_norm": 0.34450021386146545, |
| "learning_rate": 0.00015848595848595847, |
| "loss": 0.726, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.5740766167787998, |
| "grad_norm": 0.8474962711334229, |
| "learning_rate": 0.00015836385836385833, |
| "loss": 0.6974, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.5751750652203762, |
| "grad_norm": 1.565746545791626, |
| "learning_rate": 0.00015824175824175824, |
| "loss": 0.7766, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.5762735136619526, |
| "grad_norm": 0.4393616020679474, |
| "learning_rate": 0.00015811965811965813, |
| "loss": 0.6071, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.5773719621035287, |
| "grad_norm": 0.5209214091300964, |
| "learning_rate": 0.000157997557997558, |
| "loss": 0.7546, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.578470410545105, |
| "grad_norm": 0.6069398522377014, |
| "learning_rate": 0.00015787545787545787, |
| "loss": 0.7322, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.5795688589866814, |
| "grad_norm": 0.6168296337127686, |
| "learning_rate": 0.00015775335775335773, |
| "loss": 0.5169, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.5806673074282576, |
| "grad_norm": 0.25368016958236694, |
| "learning_rate": 0.00015763125763125762, |
| "loss": 0.4838, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.5817657558698337, |
| "grad_norm": 0.4165039360523224, |
| "learning_rate": 0.0001575091575091575, |
| "loss": 1.0135, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5828642043114103, |
| "grad_norm": 0.4596197307109833, |
| "learning_rate": 0.00015738705738705736, |
| "loss": 0.5545, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.5839626527529864, |
| "grad_norm": 0.5077592730522156, |
| "learning_rate": 0.00015726495726495725, |
| "loss": 0.7754, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.5850611011945626, |
| "grad_norm": 0.5041285157203674, |
| "learning_rate": 0.00015714285714285713, |
| "loss": 0.8384, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.586159549636139, |
| "grad_norm": 0.40924420952796936, |
| "learning_rate": 0.000157020757020757, |
| "loss": 0.5511, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.5872579980777153, |
| "grad_norm": 0.4800551235675812, |
| "learning_rate": 0.00015689865689865688, |
| "loss": 0.6154, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.5883564465192914, |
| "grad_norm": 0.433174729347229, |
| "learning_rate": 0.00015677655677655676, |
| "loss": 0.6158, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.5894548949608678, |
| "grad_norm": 0.29649895429611206, |
| "learning_rate": 0.00015665445665445662, |
| "loss": 0.5729, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.5905533434024441, |
| "grad_norm": 0.3815969228744507, |
| "learning_rate": 0.0001565323565323565, |
| "loss": 0.6748, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.5916517918440203, |
| "grad_norm": 0.4933919608592987, |
| "learning_rate": 0.00015641025641025642, |
| "loss": 0.7683, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.5927502402855966, |
| "grad_norm": 0.5053071975708008, |
| "learning_rate": 0.00015628815628815625, |
| "loss": 0.6779, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.593848688727173, |
| "grad_norm": 0.3900013566017151, |
| "learning_rate": 0.00015616605616605616, |
| "loss": 0.6326, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.5949471371687491, |
| "grad_norm": 0.5823982357978821, |
| "learning_rate": 0.00015604395604395605, |
| "loss": 0.6104, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.5960455856103253, |
| "grad_norm": 0.5277792811393738, |
| "learning_rate": 0.0001559218559218559, |
| "loss": 0.6647, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.5971440340519016, |
| "grad_norm": 0.32926440238952637, |
| "learning_rate": 0.0001557997557997558, |
| "loss": 0.6064, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.598242482493478, |
| "grad_norm": 0.7350378036499023, |
| "learning_rate": 0.00015567765567765568, |
| "loss": 0.7951, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.5993409309350541, |
| "grad_norm": 0.4125807285308838, |
| "learning_rate": 0.00015555555555555554, |
| "loss": 0.7761, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.6004393793766305, |
| "grad_norm": 0.49707722663879395, |
| "learning_rate": 0.00015543345543345542, |
| "loss": 0.7299, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.6015378278182069, |
| "grad_norm": 0.3240358829498291, |
| "learning_rate": 0.0001553113553113553, |
| "loss": 0.4832, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.602636276259783, |
| "grad_norm": 0.44430434703826904, |
| "learning_rate": 0.00015518925518925517, |
| "loss": 0.5968, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.6037347247013594, |
| "grad_norm": 0.3702992796897888, |
| "learning_rate": 0.00015506715506715505, |
| "loss": 0.7177, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.6048331731429357, |
| "grad_norm": 0.5001052618026733, |
| "learning_rate": 0.00015494505494505494, |
| "loss": 0.7448, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.6059316215845119, |
| "grad_norm": 0.45969969034194946, |
| "learning_rate": 0.0001548229548229548, |
| "loss": 0.8292, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.607030070026088, |
| "grad_norm": 0.46075674891471863, |
| "learning_rate": 0.00015470085470085468, |
| "loss": 0.5624, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.6081285184676646, |
| "grad_norm": 2.077080488204956, |
| "learning_rate": 0.00015457875457875454, |
| "loss": 0.6643, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.6092269669092407, |
| "grad_norm": 0.46008172631263733, |
| "learning_rate": 0.00015445665445665445, |
| "loss": 0.6329, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.6103254153508169, |
| "grad_norm": 0.5016405582427979, |
| "learning_rate": 0.00015433455433455434, |
| "loss": 0.7692, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.6114238637923932, |
| "grad_norm": 0.46292269229888916, |
| "learning_rate": 0.0001542124542124542, |
| "loss": 0.6485, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.6125223122339696, |
| "grad_norm": 0.4498538672924042, |
| "learning_rate": 0.00015409035409035408, |
| "loss": 0.598, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.6136207606755457, |
| "grad_norm": 0.3537295162677765, |
| "learning_rate": 0.00015396825396825397, |
| "loss": 0.6356, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.614719209117122, |
| "grad_norm": 0.9966747164726257, |
| "learning_rate": 0.00015384615384615382, |
| "loss": 0.6627, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.6158176575586984, |
| "grad_norm": 0.9386951327323914, |
| "learning_rate": 0.0001537240537240537, |
| "loss": 0.8148, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.6169161060002746, |
| "grad_norm": 0.3452979028224945, |
| "learning_rate": 0.0001536019536019536, |
| "loss": 0.5778, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.618014554441851, |
| "grad_norm": 0.3443523049354553, |
| "learning_rate": 0.00015347985347985345, |
| "loss": 0.9228, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.6191130028834273, |
| "grad_norm": 0.5345872044563293, |
| "learning_rate": 0.00015335775335775334, |
| "loss": 0.4682, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.6202114513250034, |
| "grad_norm": 0.35112351179122925, |
| "learning_rate": 0.00015323565323565322, |
| "loss": 0.5482, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6213098997665796, |
| "grad_norm": 0.39090535044670105, |
| "learning_rate": 0.00015311355311355308, |
| "loss": 0.825, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.622408348208156, |
| "grad_norm": 1.1684538125991821, |
| "learning_rate": 0.00015299145299145297, |
| "loss": 0.6561, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.6235067966497323, |
| "grad_norm": 0.4006233513355255, |
| "learning_rate": 0.00015286935286935288, |
| "loss": 0.3647, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.6246052450913084, |
| "grad_norm": 0.30577126145362854, |
| "learning_rate": 0.0001527472527472527, |
| "loss": 0.4934, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.6257036935328848, |
| "grad_norm": 0.39927995204925537, |
| "learning_rate": 0.00015262515262515263, |
| "loss": 0.6028, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.6268021419744612, |
| "grad_norm": 0.49143150448799133, |
| "learning_rate": 0.0001525030525030525, |
| "loss": 0.4595, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.6279005904160373, |
| "grad_norm": 0.8603225946426392, |
| "learning_rate": 0.00015238095238095237, |
| "loss": 0.8617, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.6289990388576137, |
| "grad_norm": 0.534269392490387, |
| "learning_rate": 0.00015225885225885225, |
| "loss": 0.6648, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.63009748729919, |
| "grad_norm": 0.4987354278564453, |
| "learning_rate": 0.00015213675213675214, |
| "loss": 0.5908, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.6311959357407662, |
| "grad_norm": 0.5739774107933044, |
| "learning_rate": 0.000152014652014652, |
| "loss": 0.7652, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.6322943841823423, |
| "grad_norm": 0.5343801975250244, |
| "learning_rate": 0.00015189255189255188, |
| "loss": 0.6864, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.6333928326239189, |
| "grad_norm": 0.45683905482292175, |
| "learning_rate": 0.00015177045177045177, |
| "loss": 0.7179, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.634491281065495, |
| "grad_norm": 0.5020450949668884, |
| "learning_rate": 0.00015164835164835163, |
| "loss": 0.4356, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.6355897295070712, |
| "grad_norm": 0.3870914876461029, |
| "learning_rate": 0.0001515262515262515, |
| "loss": 0.692, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.6366881779486475, |
| "grad_norm": 0.5256255269050598, |
| "learning_rate": 0.00015140415140415137, |
| "loss": 0.7184, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6377866263902239, |
| "grad_norm": 0.27588197588920593, |
| "learning_rate": 0.00015128205128205126, |
| "loss": 0.6928, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.6388850748318, |
| "grad_norm": 0.43336692452430725, |
| "learning_rate": 0.00015115995115995114, |
| "loss": 0.7357, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.6399835232733764, |
| "grad_norm": 0.7952486872673035, |
| "learning_rate": 0.000151037851037851, |
| "loss": 0.5536, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.6410819717149527, |
| "grad_norm": 3.8659090995788574, |
| "learning_rate": 0.00015091575091575089, |
| "loss": 0.6409, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.6421804201565289, |
| "grad_norm": 0.3824027478694916, |
| "learning_rate": 0.0001507936507936508, |
| "loss": 0.5988, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.643278868598105, |
| "grad_norm": 0.45106491446495056, |
| "learning_rate": 0.00015067155067155066, |
| "loss": 0.7568, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.6443773170396816, |
| "grad_norm": 0.719417154788971, |
| "learning_rate": 0.00015054945054945054, |
| "loss": 0.8191, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.6454757654812577, |
| "grad_norm": 0.4702167212963104, |
| "learning_rate": 0.00015042735042735043, |
| "loss": 0.6761, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.6465742139228339, |
| "grad_norm": 0.49441996216773987, |
| "learning_rate": 0.0001503052503052503, |
| "loss": 0.7323, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.6476726623644102, |
| "grad_norm": 0.623470664024353, |
| "learning_rate": 0.00015018315018315017, |
| "loss": 0.8384, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6487711108059866, |
| "grad_norm": 0.5583334565162659, |
| "learning_rate": 0.00015006105006105006, |
| "loss": 0.8238, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.6498695592475627, |
| "grad_norm": 0.4803924560546875, |
| "learning_rate": 0.00014993894993894994, |
| "loss": 0.5322, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.650968007689139, |
| "grad_norm": 0.709605872631073, |
| "learning_rate": 0.0001498168498168498, |
| "loss": 0.8254, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.6520664561307155, |
| "grad_norm": 0.48047375679016113, |
| "learning_rate": 0.0001496947496947497, |
| "loss": 0.5263, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.6531649045722916, |
| "grad_norm": 0.41796261072158813, |
| "learning_rate": 0.00014957264957264957, |
| "loss": 0.5803, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.654263353013868, |
| "grad_norm": 0.7576707601547241, |
| "learning_rate": 0.00014945054945054943, |
| "loss": 0.545, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.6553618014554443, |
| "grad_norm": 0.4668630063533783, |
| "learning_rate": 0.00014932844932844932, |
| "loss": 0.6213, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.6564602498970205, |
| "grad_norm": 0.9730806350708008, |
| "learning_rate": 0.00014920634920634917, |
| "loss": 0.5415, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.6575586983385966, |
| "grad_norm": 0.39670151472091675, |
| "learning_rate": 0.0001490842490842491, |
| "loss": 0.7931, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.658657146780173, |
| "grad_norm": 0.6003556847572327, |
| "learning_rate": 0.00014896214896214895, |
| "loss": 0.7494, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6597555952217493, |
| "grad_norm": 0.4335152506828308, |
| "learning_rate": 0.00014884004884004883, |
| "loss": 0.7003, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.6608540436633255, |
| "grad_norm": 0.34025630354881287, |
| "learning_rate": 0.00014871794871794872, |
| "loss": 0.9012, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.6619524921049018, |
| "grad_norm": 0.403934508562088, |
| "learning_rate": 0.00014859584859584858, |
| "loss": 0.717, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.6630509405464782, |
| "grad_norm": 0.45691147446632385, |
| "learning_rate": 0.00014847374847374846, |
| "loss": 0.4833, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.6641493889880543, |
| "grad_norm": 0.42266151309013367, |
| "learning_rate": 0.00014835164835164835, |
| "loss": 0.5892, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.6652478374296307, |
| "grad_norm": 0.392337441444397, |
| "learning_rate": 0.0001482295482295482, |
| "loss": 0.7748, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.666346285871207, |
| "grad_norm": 0.352081298828125, |
| "learning_rate": 0.0001481074481074481, |
| "loss": 0.6018, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.6674447343127832, |
| "grad_norm": 0.46293389797210693, |
| "learning_rate": 0.00014798534798534798, |
| "loss": 0.4696, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.6685431827543593, |
| "grad_norm": 0.6427372097969055, |
| "learning_rate": 0.00014786324786324786, |
| "loss": 0.7279, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.669641631195936, |
| "grad_norm": 0.500382125377655, |
| "learning_rate": 0.00014774114774114772, |
| "loss": 0.7395, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.670740079637512, |
| "grad_norm": 0.4410606920719147, |
| "learning_rate": 0.0001476190476190476, |
| "loss": 0.501, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.6718385280790882, |
| "grad_norm": 0.5587645769119263, |
| "learning_rate": 0.0001474969474969475, |
| "loss": 0.8655, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.6729369765206645, |
| "grad_norm": 0.4312286376953125, |
| "learning_rate": 0.00014737484737484735, |
| "loss": 0.9578, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.674035424962241, |
| "grad_norm": 0.48694175481796265, |
| "learning_rate": 0.00014725274725274723, |
| "loss": 0.6806, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.675133873403817, |
| "grad_norm": 0.39892563223838806, |
| "learning_rate": 0.00014713064713064712, |
| "loss": 0.598, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.6762323218453934, |
| "grad_norm": 0.4714735150337219, |
| "learning_rate": 0.000147008547008547, |
| "loss": 0.9637, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.6773307702869698, |
| "grad_norm": 0.8308823108673096, |
| "learning_rate": 0.00014688644688644686, |
| "loss": 0.7886, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.678429218728546, |
| "grad_norm": 0.5142358541488647, |
| "learning_rate": 0.00014676434676434675, |
| "loss": 0.8028, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.6795276671701223, |
| "grad_norm": 0.4001234471797943, |
| "learning_rate": 0.00014664224664224663, |
| "loss": 0.59, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.6806261156116986, |
| "grad_norm": 0.4112735688686371, |
| "learning_rate": 0.0001465201465201465, |
| "loss": 0.6523, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6817245640532748, |
| "grad_norm": 0.4391016960144043, |
| "learning_rate": 0.0001463980463980464, |
| "loss": 0.7372, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.682823012494851, |
| "grad_norm": 0.7199782133102417, |
| "learning_rate": 0.00014627594627594626, |
| "loss": 0.8493, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.6839214609364273, |
| "grad_norm": 0.42379269003868103, |
| "learning_rate": 0.00014615384615384615, |
| "loss": 0.6609, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.6850199093780036, |
| "grad_norm": 0.41174909472465515, |
| "learning_rate": 0.000146031746031746, |
| "loss": 0.7021, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.6861183578195797, |
| "grad_norm": 0.4856640100479126, |
| "learning_rate": 0.0001459096459096459, |
| "loss": 0.6055, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.687216806261156, |
| "grad_norm": 0.5789656043052673, |
| "learning_rate": 0.00014578754578754578, |
| "loss": 0.7003, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.6883152547027325, |
| "grad_norm": 0.5711427330970764, |
| "learning_rate": 0.00014566544566544564, |
| "loss": 0.5762, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.6894137031443086, |
| "grad_norm": 0.3285518288612366, |
| "learning_rate": 0.00014554334554334552, |
| "loss": 0.6232, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.690512151585885, |
| "grad_norm": 0.48425230383872986, |
| "learning_rate": 0.0001454212454212454, |
| "loss": 0.5515, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.6916106000274613, |
| "grad_norm": 0.573079526424408, |
| "learning_rate": 0.0001452991452991453, |
| "loss": 0.7776, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6927090484690375, |
| "grad_norm": 0.49084943532943726, |
| "learning_rate": 0.00014517704517704518, |
| "loss": 0.6504, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.6938074969106136, |
| "grad_norm": 0.46472617983818054, |
| "learning_rate": 0.00014505494505494504, |
| "loss": 0.6971, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.6949059453521902, |
| "grad_norm": 0.4890255033969879, |
| "learning_rate": 0.00014493284493284492, |
| "loss": 0.9292, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.6960043937937663, |
| "grad_norm": 0.42868301272392273, |
| "learning_rate": 0.0001448107448107448, |
| "loss": 0.6024, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.6971028422353425, |
| "grad_norm": 0.5118973255157471, |
| "learning_rate": 0.00014468864468864467, |
| "loss": 0.7598, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.6982012906769188, |
| "grad_norm": 0.40809181332588196, |
| "learning_rate": 0.00014456654456654455, |
| "loss": 0.5157, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.6992997391184952, |
| "grad_norm": 0.5236404538154602, |
| "learning_rate": 0.0001444444444444444, |
| "loss": 0.84, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.7003981875600713, |
| "grad_norm": 0.5712966322898865, |
| "learning_rate": 0.00014432234432234432, |
| "loss": 0.7208, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.7014966360016477, |
| "grad_norm": 0.2910475730895996, |
| "learning_rate": 0.00014420024420024418, |
| "loss": 0.4998, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.702595084443224, |
| "grad_norm": 0.5326736569404602, |
| "learning_rate": 0.00014407814407814407, |
| "loss": 0.5492, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7036935328848002, |
| "grad_norm": 0.5454451441764832, |
| "learning_rate": 0.00014395604395604395, |
| "loss": 0.9016, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.7047919813263763, |
| "grad_norm": 0.45031625032424927, |
| "learning_rate": 0.0001438339438339438, |
| "loss": 0.671, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.705890429767953, |
| "grad_norm": 0.5496229529380798, |
| "learning_rate": 0.0001437118437118437, |
| "loss": 0.6333, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.706988878209529, |
| "grad_norm": 0.4200669825077057, |
| "learning_rate": 0.00014358974358974358, |
| "loss": 0.6158, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.7080873266511052, |
| "grad_norm": 0.7623536586761475, |
| "learning_rate": 0.00014346764346764347, |
| "loss": 0.686, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.7091857750926815, |
| "grad_norm": 0.3363445997238159, |
| "learning_rate": 0.00014334554334554333, |
| "loss": 0.305, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.710284223534258, |
| "grad_norm": 0.5042807459831238, |
| "learning_rate": 0.0001432234432234432, |
| "loss": 0.72, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.711382671975834, |
| "grad_norm": 0.5264353156089783, |
| "learning_rate": 0.0001431013431013431, |
| "loss": 0.6778, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.7124811204174104, |
| "grad_norm": 0.48960715532302856, |
| "learning_rate": 0.00014297924297924296, |
| "loss": 0.4935, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.7135795688589868, |
| "grad_norm": 0.4308861792087555, |
| "learning_rate": 0.00014285714285714284, |
| "loss": 0.6527, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.714678017300563, |
| "grad_norm": 0.42890703678131104, |
| "learning_rate": 0.00014273504273504273, |
| "loss": 0.4846, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.7157764657421393, |
| "grad_norm": 0.5222750902175903, |
| "learning_rate": 0.0001426129426129426, |
| "loss": 0.764, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.7168749141837156, |
| "grad_norm": 0.49664998054504395, |
| "learning_rate": 0.00014249084249084247, |
| "loss": 0.5728, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.7179733626252918, |
| "grad_norm": 0.3131520748138428, |
| "learning_rate": 0.00014236874236874236, |
| "loss": 0.5089, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.719071811066868, |
| "grad_norm": 0.5098987221717834, |
| "learning_rate": 0.00014224664224664224, |
| "loss": 0.781, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.7201702595084445, |
| "grad_norm": 0.4040893316268921, |
| "learning_rate": 0.0001421245421245421, |
| "loss": 0.7358, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.7212687079500206, |
| "grad_norm": 0.3601396679878235, |
| "learning_rate": 0.00014200244200244198, |
| "loss": 0.5531, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.7223671563915968, |
| "grad_norm": 0.6634377837181091, |
| "learning_rate": 0.00014188034188034187, |
| "loss": 0.6548, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.7234656048331731, |
| "grad_norm": 0.35935553908348083, |
| "learning_rate": 0.00014175824175824173, |
| "loss": 0.5653, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.7245640532747495, |
| "grad_norm": 0.4607802927494049, |
| "learning_rate": 0.00014163614163614164, |
| "loss": 0.9111, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.7256625017163256, |
| "grad_norm": 1.0116467475891113, |
| "learning_rate": 0.0001415140415140415, |
| "loss": 0.9226, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.726760950157902, |
| "grad_norm": 0.9484761953353882, |
| "learning_rate": 0.00014139194139194139, |
| "loss": 0.7536, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.7278593985994783, |
| "grad_norm": 0.3684981167316437, |
| "learning_rate": 0.00014126984126984124, |
| "loss": 0.5013, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.7289578470410545, |
| "grad_norm": 0.40037083625793457, |
| "learning_rate": 0.00014114774114774113, |
| "loss": 0.8069, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.7300562954826306, |
| "grad_norm": 0.42828282713890076, |
| "learning_rate": 0.00014102564102564101, |
| "loss": 0.5586, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.7311547439242072, |
| "grad_norm": 0.3461548686027527, |
| "learning_rate": 0.00014090354090354087, |
| "loss": 0.6045, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.7322531923657833, |
| "grad_norm": 0.622982919216156, |
| "learning_rate": 0.00014078144078144079, |
| "loss": 0.8943, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.7333516408073595, |
| "grad_norm": 0.3318479359149933, |
| "learning_rate": 0.00014065934065934064, |
| "loss": 0.4058, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.7344500892489358, |
| "grad_norm": 0.5178685188293457, |
| "learning_rate": 0.00014053724053724053, |
| "loss": 0.5839, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.7355485376905122, |
| "grad_norm": 0.44273868203163147, |
| "learning_rate": 0.00014041514041514042, |
| "loss": 0.5394, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.7366469861320883, |
| "grad_norm": 0.60169517993927, |
| "learning_rate": 0.00014029304029304027, |
| "loss": 0.6753, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.7377454345736647, |
| "grad_norm": 0.7691718339920044, |
| "learning_rate": 0.00014017094017094016, |
| "loss": 0.9618, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.738843883015241, |
| "grad_norm": 0.3900390565395355, |
| "learning_rate": 0.00014004884004884004, |
| "loss": 0.5809, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.7399423314568172, |
| "grad_norm": 0.6272429823875427, |
| "learning_rate": 0.00013992673992673993, |
| "loss": 0.8579, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.7410407798983936, |
| "grad_norm": 0.30017220973968506, |
| "learning_rate": 0.0001398046398046398, |
| "loss": 0.5335, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.74213922833997, |
| "grad_norm": 0.4937066435813904, |
| "learning_rate": 0.00013968253968253967, |
| "loss": 0.7941, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.743237676781546, |
| "grad_norm": 0.47317594289779663, |
| "learning_rate": 0.00013956043956043956, |
| "loss": 0.6013, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.7443361252231222, |
| "grad_norm": 1.9155733585357666, |
| "learning_rate": 0.00013943833943833942, |
| "loss": 0.6708, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.7454345736646986, |
| "grad_norm": 0.3844835162162781, |
| "learning_rate": 0.0001393162393162393, |
| "loss": 0.7176, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.746533022106275, |
| "grad_norm": 0.42810145020484924, |
| "learning_rate": 0.0001391941391941392, |
| "loss": 0.9255, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.747631470547851, |
| "grad_norm": 3.846015691757202, |
| "learning_rate": 0.00013907203907203905, |
| "loss": 0.6202, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.7487299189894274, |
| "grad_norm": 0.42783257365226746, |
| "learning_rate": 0.00013894993894993893, |
| "loss": 0.7451, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.7498283674310038, |
| "grad_norm": 0.5237023234367371, |
| "learning_rate": 0.00013882783882783882, |
| "loss": 0.7961, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.75092681587258, |
| "grad_norm": 2.5639729499816895, |
| "learning_rate": 0.0001387057387057387, |
| "loss": 0.7026, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.7520252643141563, |
| "grad_norm": 0.5686498284339905, |
| "learning_rate": 0.00013858363858363856, |
| "loss": 0.4916, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.7531237127557326, |
| "grad_norm": 0.561611533164978, |
| "learning_rate": 0.00013846153846153845, |
| "loss": 0.772, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.7542221611973088, |
| "grad_norm": 0.6220077872276306, |
| "learning_rate": 0.00013833943833943833, |
| "loss": 0.5694, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.755320609638885, |
| "grad_norm": 0.6902570724487305, |
| "learning_rate": 0.0001382173382173382, |
| "loss": 0.7963, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.7564190580804615, |
| "grad_norm": 2.0417702198028564, |
| "learning_rate": 0.00013809523809523808, |
| "loss": 0.6721, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.7575175065220376, |
| "grad_norm": 0.36764901876449585, |
| "learning_rate": 0.00013797313797313796, |
| "loss": 0.5714, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7586159549636138, |
| "grad_norm": 0.6679022908210754, |
| "learning_rate": 0.00013785103785103785, |
| "loss": 0.7025, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.7597144034051901, |
| "grad_norm": 0.5749796628952026, |
| "learning_rate": 0.0001377289377289377, |
| "loss": 0.7381, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.7608128518467665, |
| "grad_norm": 0.9285687208175659, |
| "learning_rate": 0.0001376068376068376, |
| "loss": 0.6, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.7619113002883426, |
| "grad_norm": 0.8209772706031799, |
| "learning_rate": 0.00013748473748473748, |
| "loss": 0.5701, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.763009748729919, |
| "grad_norm": 0.7823337912559509, |
| "learning_rate": 0.00013736263736263734, |
| "loss": 0.6695, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.7641081971714954, |
| "grad_norm": 0.4885605275630951, |
| "learning_rate": 0.00013724053724053725, |
| "loss": 0.6487, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.7652066456130715, |
| "grad_norm": 0.36517488956451416, |
| "learning_rate": 0.0001371184371184371, |
| "loss": 0.5798, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.7663050940546479, |
| "grad_norm": 0.49961966276168823, |
| "learning_rate": 0.000136996336996337, |
| "loss": 0.4373, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.7674035424962242, |
| "grad_norm": 0.495263010263443, |
| "learning_rate": 0.00013687423687423688, |
| "loss": 0.5868, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.7685019909378004, |
| "grad_norm": 0.7384648323059082, |
| "learning_rate": 0.00013675213675213674, |
| "loss": 0.4957, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7696004393793765, |
| "grad_norm": 0.465440034866333, |
| "learning_rate": 0.00013663003663003662, |
| "loss": 0.7424, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.7706988878209529, |
| "grad_norm": 0.68381667137146, |
| "learning_rate": 0.00013650793650793648, |
| "loss": 1.0421, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.7717973362625292, |
| "grad_norm": 4.455906867980957, |
| "learning_rate": 0.00013638583638583637, |
| "loss": 0.6626, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.7728957847041054, |
| "grad_norm": 0.6165801286697388, |
| "learning_rate": 0.00013626373626373625, |
| "loss": 0.6072, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.7739942331456817, |
| "grad_norm": 0.8296604156494141, |
| "learning_rate": 0.00013614163614163614, |
| "loss": 0.6507, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.775092681587258, |
| "grad_norm": 0.4678190350532532, |
| "learning_rate": 0.00013601953601953602, |
| "loss": 0.8466, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.7761911300288342, |
| "grad_norm": 1.2141482830047607, |
| "learning_rate": 0.00013589743589743588, |
| "loss": 0.513, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.7772895784704106, |
| "grad_norm": 0.4522024691104889, |
| "learning_rate": 0.00013577533577533577, |
| "loss": 0.7571, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.778388026911987, |
| "grad_norm": 2.0903220176696777, |
| "learning_rate": 0.00013565323565323565, |
| "loss": 0.7359, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.779486475353563, |
| "grad_norm": 0.5292307734489441, |
| "learning_rate": 0.0001355311355311355, |
| "loss": 0.6526, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.7805849237951392, |
| "grad_norm": 0.5047786235809326, |
| "learning_rate": 0.0001354090354090354, |
| "loss": 0.7056, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.7816833722367158, |
| "grad_norm": 0.4102507531642914, |
| "learning_rate": 0.00013528693528693528, |
| "loss": 0.8673, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.782781820678292, |
| "grad_norm": 0.471556693315506, |
| "learning_rate": 0.00013516483516483517, |
| "loss": 0.9424, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.783880269119868, |
| "grad_norm": 0.6595687866210938, |
| "learning_rate": 0.00013504273504273502, |
| "loss": 0.661, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.7849787175614444, |
| "grad_norm": 0.6221860647201538, |
| "learning_rate": 0.0001349206349206349, |
| "loss": 0.5457, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.7860771660030208, |
| "grad_norm": 0.9256211519241333, |
| "learning_rate": 0.0001347985347985348, |
| "loss": 0.9216, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.787175614444597, |
| "grad_norm": 0.31376492977142334, |
| "learning_rate": 0.00013467643467643465, |
| "loss": 0.7071, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.7882740628861733, |
| "grad_norm": 0.5313776135444641, |
| "learning_rate": 0.00013455433455433454, |
| "loss": 0.8111, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.7893725113277497, |
| "grad_norm": 0.8203330636024475, |
| "learning_rate": 0.00013443223443223442, |
| "loss": 0.5301, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.7904709597693258, |
| "grad_norm": 0.42774948477745056, |
| "learning_rate": 0.0001343101343101343, |
| "loss": 0.8359, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.791569408210902, |
| "grad_norm": 0.8165685534477234, |
| "learning_rate": 0.00013418803418803417, |
| "loss": 0.4894, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.7926678566524785, |
| "grad_norm": 0.5739139318466187, |
| "learning_rate": 0.00013406593406593405, |
| "loss": 0.7009, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.7937663050940547, |
| "grad_norm": 0.5102986097335815, |
| "learning_rate": 0.00013394383394383394, |
| "loss": 0.7174, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.7948647535356308, |
| "grad_norm": 1.1377652883529663, |
| "learning_rate": 0.0001338217338217338, |
| "loss": 0.79, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.7959632019772072, |
| "grad_norm": 0.44272491335868835, |
| "learning_rate": 0.00013369963369963368, |
| "loss": 0.6761, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.7970616504187835, |
| "grad_norm": 0.5084714889526367, |
| "learning_rate": 0.00013357753357753357, |
| "loss": 0.6848, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.7981600988603597, |
| "grad_norm": 0.752017080783844, |
| "learning_rate": 0.00013345543345543345, |
| "loss": 0.6107, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.799258547301936, |
| "grad_norm": 0.4430617690086365, |
| "learning_rate": 0.0001333333333333333, |
| "loss": 0.7639, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.8003569957435124, |
| "grad_norm": 0.8098049759864807, |
| "learning_rate": 0.0001332112332112332, |
| "loss": 0.8172, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.8014554441850885, |
| "grad_norm": 0.6817697286605835, |
| "learning_rate": 0.00013308913308913308, |
| "loss": 0.8274, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.8025538926266649, |
| "grad_norm": 0.5132669806480408, |
| "learning_rate": 0.00013296703296703294, |
| "loss": 0.6269, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.8036523410682412, |
| "grad_norm": 0.8487284183502197, |
| "learning_rate": 0.00013284493284493283, |
| "loss": 0.6734, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.8047507895098174, |
| "grad_norm": 0.7084116339683533, |
| "learning_rate": 0.0001327228327228327, |
| "loss": 0.703, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.8058492379513935, |
| "grad_norm": 0.39045432209968567, |
| "learning_rate": 0.00013260073260073257, |
| "loss": 0.5466, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.8069476863929699, |
| "grad_norm": 0.4408475160598755, |
| "learning_rate": 0.00013247863247863248, |
| "loss": 0.4998, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.8080461348345462, |
| "grad_norm": 0.41640380024909973, |
| "learning_rate": 0.00013235653235653234, |
| "loss": 0.49, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.8091445832761224, |
| "grad_norm": 0.6760729551315308, |
| "learning_rate": 0.00013223443223443223, |
| "loss": 0.4537, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.8102430317176987, |
| "grad_norm": 0.42953255772590637, |
| "learning_rate": 0.0001321123321123321, |
| "loss": 0.489, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.811341480159275, |
| "grad_norm": 0.3260825574398041, |
| "learning_rate": 0.00013199023199023197, |
| "loss": 0.6633, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.8124399286008512, |
| "grad_norm": 0.7073171138763428, |
| "learning_rate": 0.00013186813186813186, |
| "loss": 0.4953, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8135383770424276, |
| "grad_norm": 0.36153069138526917, |
| "learning_rate": 0.00013174603174603172, |
| "loss": 0.7641, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.814636825484004, |
| "grad_norm": 0.4233636260032654, |
| "learning_rate": 0.00013162393162393163, |
| "loss": 0.7119, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.81573527392558, |
| "grad_norm": 0.5262153148651123, |
| "learning_rate": 0.0001315018315018315, |
| "loss": 0.4516, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.8168337223671562, |
| "grad_norm": 0.5263295769691467, |
| "learning_rate": 0.00013137973137973137, |
| "loss": 0.7786, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.8179321708087328, |
| "grad_norm": 0.3681116998195648, |
| "learning_rate": 0.00013125763125763126, |
| "loss": 0.5295, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.819030619250309, |
| "grad_norm": 0.5075433254241943, |
| "learning_rate": 0.00013113553113553112, |
| "loss": 0.6017, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.820129067691885, |
| "grad_norm": 0.2960616946220398, |
| "learning_rate": 0.000131013431013431, |
| "loss": 0.4951, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.8212275161334615, |
| "grad_norm": 0.4010205864906311, |
| "learning_rate": 0.0001308913308913309, |
| "loss": 0.8916, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.8223259645750378, |
| "grad_norm": 0.9112391471862793, |
| "learning_rate": 0.00013076923076923077, |
| "loss": 0.4978, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.823424413016614, |
| "grad_norm": 0.7214633226394653, |
| "learning_rate": 0.00013064713064713063, |
| "loss": 0.791, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.8245228614581903, |
| "grad_norm": 0.4174933433532715, |
| "learning_rate": 0.00013052503052503052, |
| "loss": 0.4099, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.8256213098997667, |
| "grad_norm": 0.4622137248516083, |
| "learning_rate": 0.0001304029304029304, |
| "loss": 1.1726, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.8267197583413428, |
| "grad_norm": 0.5991957783699036, |
| "learning_rate": 0.00013028083028083026, |
| "loss": 0.6713, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.8278182067829192, |
| "grad_norm": 0.43959730863571167, |
| "learning_rate": 0.00013015873015873015, |
| "loss": 0.5676, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.8289166552244955, |
| "grad_norm": 0.6271671056747437, |
| "learning_rate": 0.00013003663003663003, |
| "loss": 0.7399, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.8300151036660717, |
| "grad_norm": 0.6412084102630615, |
| "learning_rate": 0.0001299145299145299, |
| "loss": 0.7585, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.8311135521076478, |
| "grad_norm": 0.4066605269908905, |
| "learning_rate": 0.00012979242979242977, |
| "loss": 0.5756, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.8322120005492242, |
| "grad_norm": 0.3568172752857208, |
| "learning_rate": 0.00012967032967032966, |
| "loss": 0.968, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.8333104489908005, |
| "grad_norm": 0.5061100721359253, |
| "learning_rate": 0.00012954822954822955, |
| "loss": 0.5089, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.8344088974323767, |
| "grad_norm": 3.013622522354126, |
| "learning_rate": 0.0001294261294261294, |
| "loss": 0.5101, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.835507345873953, |
| "grad_norm": 0.40078219771385193, |
| "learning_rate": 0.0001293040293040293, |
| "loss": 0.5602, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.8366057943155294, |
| "grad_norm": 0.4108009338378906, |
| "learning_rate": 0.00012918192918192918, |
| "loss": 0.6338, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.8377042427571055, |
| "grad_norm": 0.5452212691307068, |
| "learning_rate": 0.00012905982905982903, |
| "loss": 0.5358, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.838802691198682, |
| "grad_norm": 0.4694603979587555, |
| "learning_rate": 0.00012893772893772895, |
| "loss": 0.7031, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.8399011396402583, |
| "grad_norm": 0.3787671625614166, |
| "learning_rate": 0.0001288156288156288, |
| "loss": 0.5667, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8409995880818344, |
| "grad_norm": 0.4842737317085266, |
| "learning_rate": 0.0001286935286935287, |
| "loss": 0.5082, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.8420980365234105, |
| "grad_norm": 0.7690992951393127, |
| "learning_rate": 0.00012857142857142855, |
| "loss": 0.706, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.8431964849649871, |
| "grad_norm": 1.0891668796539307, |
| "learning_rate": 0.00012844932844932843, |
| "loss": 0.7162, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.8442949334065633, |
| "grad_norm": 0.4118032157421112, |
| "learning_rate": 0.00012832722832722832, |
| "loss": 0.7019, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.8453933818481394, |
| "grad_norm": 0.513157308101654, |
| "learning_rate": 0.00012820512820512818, |
| "loss": 0.4359, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.8464918302897158, |
| "grad_norm": 1.3229504823684692, |
| "learning_rate": 0.0001280830280830281, |
| "loss": 0.5555, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.8475902787312921, |
| "grad_norm": 0.6301699876785278, |
| "learning_rate": 0.00012796092796092795, |
| "loss": 0.5211, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.8486887271728683, |
| "grad_norm": 0.6125632524490356, |
| "learning_rate": 0.00012783882783882783, |
| "loss": 0.6287, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.8497871756144446, |
| "grad_norm": 1.806593418121338, |
| "learning_rate": 0.00012771672771672772, |
| "loss": 0.5794, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.850885624056021, |
| "grad_norm": 1.2972358465194702, |
| "learning_rate": 0.00012759462759462758, |
| "loss": 0.9205, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.8519840724975971, |
| "grad_norm": 1.0519033670425415, |
| "learning_rate": 0.00012747252747252746, |
| "loss": 0.7103, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.8530825209391735, |
| "grad_norm": 1.6489734649658203, |
| "learning_rate": 0.00012735042735042735, |
| "loss": 0.7585, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.8541809693807498, |
| "grad_norm": 0.7229527235031128, |
| "learning_rate": 0.0001272283272283272, |
| "loss": 0.8109, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.855279417822326, |
| "grad_norm": 0.35257261991500854, |
| "learning_rate": 0.0001271062271062271, |
| "loss": 0.8014, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.856377866263902, |
| "grad_norm": 0.4653327167034149, |
| "learning_rate": 0.00012698412698412698, |
| "loss": 0.6404, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.8574763147054785, |
| "grad_norm": 0.5230842232704163, |
| "learning_rate": 0.00012686202686202686, |
| "loss": 0.7413, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.8585747631470548, |
| "grad_norm": 0.42130210995674133, |
| "learning_rate": 0.00012673992673992672, |
| "loss": 0.7283, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.859673211588631, |
| "grad_norm": 1.4667960405349731, |
| "learning_rate": 0.0001266178266178266, |
| "loss": 0.5656, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.8607716600302073, |
| "grad_norm": 0.4077359139919281, |
| "learning_rate": 0.0001264957264957265, |
| "loss": 0.5891, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.8618701084717837, |
| "grad_norm": 0.503654956817627, |
| "learning_rate": 0.00012637362637362635, |
| "loss": 0.5912, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.8629685569133598, |
| "grad_norm": 1.6315315961837769, |
| "learning_rate": 0.00012625152625152624, |
| "loss": 0.5588, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.8640670053549362, |
| "grad_norm": 0.783920407295227, |
| "learning_rate": 0.00012612942612942612, |
| "loss": 0.6585, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.8651654537965126, |
| "grad_norm": 0.7186728715896606, |
| "learning_rate": 0.000126007326007326, |
| "loss": 0.9174, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.8662639022380887, |
| "grad_norm": 0.8784156441688538, |
| "learning_rate": 0.00012588522588522587, |
| "loss": 0.5835, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.8673623506796648, |
| "grad_norm": 0.7090787887573242, |
| "learning_rate": 0.00012576312576312575, |
| "loss": 0.7555, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8684607991212414, |
| "grad_norm": 0.5508129596710205, |
| "learning_rate": 0.00012564102564102564, |
| "loss": 0.6168, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.8695592475628175, |
| "grad_norm": 0.40403681993484497, |
| "learning_rate": 0.0001255189255189255, |
| "loss": 0.4528, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.8706576960043937, |
| "grad_norm": 0.9553635716438293, |
| "learning_rate": 0.00012539682539682538, |
| "loss": 0.654, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.87175614444597, |
| "grad_norm": 1.0610092878341675, |
| "learning_rate": 0.00012527472527472527, |
| "loss": 0.6115, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.8728545928875464, |
| "grad_norm": 0.32898634672164917, |
| "learning_rate": 0.00012515262515262515, |
| "loss": 0.5651, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.8739530413291225, |
| "grad_norm": 0.4018780589103699, |
| "learning_rate": 0.000125030525030525, |
| "loss": 0.5919, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.875051489770699, |
| "grad_norm": 1.6521873474121094, |
| "learning_rate": 0.0001249084249084249, |
| "loss": 0.7137, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.8761499382122753, |
| "grad_norm": 0.5515930652618408, |
| "learning_rate": 0.00012478632478632478, |
| "loss": 0.4471, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.8772483866538514, |
| "grad_norm": 0.4156915545463562, |
| "learning_rate": 0.00012466422466422464, |
| "loss": 0.6575, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.8783468350954275, |
| "grad_norm": 0.41263312101364136, |
| "learning_rate": 0.00012454212454212453, |
| "loss": 0.542, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8794452835370041, |
| "grad_norm": 1.0169517993927002, |
| "learning_rate": 0.0001244200244200244, |
| "loss": 1.1631, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.8805437319785803, |
| "grad_norm": 0.49169981479644775, |
| "learning_rate": 0.0001242979242979243, |
| "loss": 0.6707, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.8816421804201564, |
| "grad_norm": 0.44801297783851624, |
| "learning_rate": 0.00012417582417582416, |
| "loss": 1.0036, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.8827406288617328, |
| "grad_norm": 0.47181040048599243, |
| "learning_rate": 0.00012405372405372404, |
| "loss": 0.6693, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.8838390773033091, |
| "grad_norm": 0.39900457859039307, |
| "learning_rate": 0.00012393162393162393, |
| "loss": 0.6421, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.8849375257448853, |
| "grad_norm": 1.1160179376602173, |
| "learning_rate": 0.00012380952380952378, |
| "loss": 0.6599, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.8860359741864616, |
| "grad_norm": 0.6951555609703064, |
| "learning_rate": 0.00012368742368742367, |
| "loss": 0.743, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.887134422628038, |
| "grad_norm": 0.5381472706794739, |
| "learning_rate": 0.00012356532356532356, |
| "loss": 0.5051, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.8882328710696141, |
| "grad_norm": 0.48717793822288513, |
| "learning_rate": 0.00012344322344322341, |
| "loss": 0.7015, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.8893313195111905, |
| "grad_norm": 0.3720596432685852, |
| "learning_rate": 0.00012332112332112333, |
| "loss": 0.6743, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8904297679527668, |
| "grad_norm": 1.1850451231002808, |
| "learning_rate": 0.00012319902319902318, |
| "loss": 0.6132, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.891528216394343, |
| "grad_norm": 0.4546525180339813, |
| "learning_rate": 0.00012307692307692307, |
| "loss": 0.5465, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.8926266648359191, |
| "grad_norm": 0.41415080428123474, |
| "learning_rate": 0.00012295482295482296, |
| "loss": 0.7259, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.8937251132774955, |
| "grad_norm": 0.44278842210769653, |
| "learning_rate": 0.00012283272283272281, |
| "loss": 0.7244, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.8948235617190718, |
| "grad_norm": 0.3887364864349365, |
| "learning_rate": 0.0001227106227106227, |
| "loss": 0.7124, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.895922010160648, |
| "grad_norm": 0.5405781269073486, |
| "learning_rate": 0.00012258852258852256, |
| "loss": 0.5153, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.8970204586022243, |
| "grad_norm": 0.3530559837818146, |
| "learning_rate": 0.00012246642246642247, |
| "loss": 0.5429, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.8981189070438007, |
| "grad_norm": 0.523621678352356, |
| "learning_rate": 0.00012234432234432233, |
| "loss": 0.5645, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.8992173554853768, |
| "grad_norm": 0.3893704116344452, |
| "learning_rate": 0.00012222222222222221, |
| "loss": 0.6419, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.9003158039269532, |
| "grad_norm": 0.7010704278945923, |
| "learning_rate": 0.0001221001221001221, |
| "loss": 0.5202, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.9014142523685296, |
| "grad_norm": 0.45551490783691406, |
| "learning_rate": 0.00012197802197802197, |
| "loss": 0.8492, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.9025127008101057, |
| "grad_norm": 1.0112484693527222, |
| "learning_rate": 0.00012185592185592184, |
| "loss": 0.8602, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.9036111492516818, |
| "grad_norm": 0.4509601294994354, |
| "learning_rate": 0.00012173382173382173, |
| "loss": 0.6138, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.9047095976932584, |
| "grad_norm": 0.4303388297557831, |
| "learning_rate": 0.0001216117216117216, |
| "loss": 0.4748, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.9058080461348346, |
| "grad_norm": 0.4452000558376312, |
| "learning_rate": 0.00012148962148962147, |
| "loss": 0.5869, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.9069064945764107, |
| "grad_norm": 0.5915077924728394, |
| "learning_rate": 0.00012136752136752136, |
| "loss": 0.8057, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.908004943017987, |
| "grad_norm": 0.38761547207832336, |
| "learning_rate": 0.00012124542124542123, |
| "loss": 0.5772, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.9091033914595634, |
| "grad_norm": 0.517752468585968, |
| "learning_rate": 0.00012112332112332112, |
| "loss": 0.7865, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.9102018399011396, |
| "grad_norm": 0.5325546860694885, |
| "learning_rate": 0.00012100122100122099, |
| "loss": 0.5934, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.911300288342716, |
| "grad_norm": 0.3930620551109314, |
| "learning_rate": 0.00012087912087912087, |
| "loss": 0.5974, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9123987367842923, |
| "grad_norm": 1.1001818180084229, |
| "learning_rate": 0.00012075702075702075, |
| "loss": 0.6524, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.9134971852258684, |
| "grad_norm": 0.3690165877342224, |
| "learning_rate": 0.00012063492063492062, |
| "loss": 0.36, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.9145956336674448, |
| "grad_norm": 0.4403206408023834, |
| "learning_rate": 0.0001205128205128205, |
| "loss": 0.5737, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.9156940821090211, |
| "grad_norm": 0.651498019695282, |
| "learning_rate": 0.00012039072039072037, |
| "loss": 0.657, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.9167925305505973, |
| "grad_norm": 0.6880660057067871, |
| "learning_rate": 0.00012026862026862025, |
| "loss": 0.6891, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.9178909789921734, |
| "grad_norm": 0.4968664348125458, |
| "learning_rate": 0.00012014652014652015, |
| "loss": 0.841, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.9189894274337498, |
| "grad_norm": 0.4392407536506653, |
| "learning_rate": 0.00012002442002442002, |
| "loss": 0.7096, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.9200878758753261, |
| "grad_norm": 0.41028741002082825, |
| "learning_rate": 0.00011990231990231989, |
| "loss": 0.5838, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.9211863243169023, |
| "grad_norm": 0.7928158640861511, |
| "learning_rate": 0.00011978021978021978, |
| "loss": 0.6633, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.9222847727584786, |
| "grad_norm": 0.4970681071281433, |
| "learning_rate": 0.00011965811965811965, |
| "loss": 0.7764, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.923383221200055, |
| "grad_norm": 0.49581378698349, |
| "learning_rate": 0.00011953601953601952, |
| "loss": 0.7204, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.9244816696416311, |
| "grad_norm": 1.309241771697998, |
| "learning_rate": 0.00011941391941391939, |
| "loss": 0.5859, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.9255801180832075, |
| "grad_norm": 0.4651016592979431, |
| "learning_rate": 0.00011929181929181929, |
| "loss": 0.6425, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.9266785665247839, |
| "grad_norm": 0.5377634167671204, |
| "learning_rate": 0.00011916971916971916, |
| "loss": 0.8244, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.92777701496636, |
| "grad_norm": 0.6809287667274475, |
| "learning_rate": 0.00011904761904761903, |
| "loss": 0.5711, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.9288754634079361, |
| "grad_norm": 0.650701105594635, |
| "learning_rate": 0.00011892551892551892, |
| "loss": 0.8341, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.9299739118495127, |
| "grad_norm": 1.1710751056671143, |
| "learning_rate": 0.00011880341880341879, |
| "loss": 0.8093, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.9310723602910889, |
| "grad_norm": 0.4244484603404999, |
| "learning_rate": 0.00011868131868131866, |
| "loss": 0.5556, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.932170808732665, |
| "grad_norm": 0.43999040126800537, |
| "learning_rate": 0.00011855921855921855, |
| "loss": 0.4582, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.9332692571742414, |
| "grad_norm": 0.4197145700454712, |
| "learning_rate": 0.00011843711843711843, |
| "loss": 0.6475, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.9343677056158177, |
| "grad_norm": 0.36619749665260315, |
| "learning_rate": 0.0001183150183150183, |
| "loss": 0.5804, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.9354661540573939, |
| "grad_norm": 1.7230706214904785, |
| "learning_rate": 0.00011819291819291819, |
| "loss": 0.7064, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.9365646024989702, |
| "grad_norm": 0.7621874213218689, |
| "learning_rate": 0.00011807081807081806, |
| "loss": 0.6766, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.9376630509405466, |
| "grad_norm": 0.5920525789260864, |
| "learning_rate": 0.00011794871794871794, |
| "loss": 0.7092, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.9387614993821227, |
| "grad_norm": 1.5368432998657227, |
| "learning_rate": 0.00011782661782661781, |
| "loss": 0.3366, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.9398599478236989, |
| "grad_norm": 0.43197643756866455, |
| "learning_rate": 0.00011770451770451769, |
| "loss": 0.6158, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.9409583962652754, |
| "grad_norm": 0.4623143970966339, |
| "learning_rate": 0.00011758241758241756, |
| "loss": 0.6574, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.9420568447068516, |
| "grad_norm": 0.40638601779937744, |
| "learning_rate": 0.00011746031746031744, |
| "loss": 0.4385, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.9431552931484277, |
| "grad_norm": 0.5941652655601501, |
| "learning_rate": 0.00011733821733821734, |
| "loss": 0.8634, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.944253741590004, |
| "grad_norm": 0.9646288156509399, |
| "learning_rate": 0.00011721611721611721, |
| "loss": 0.7107, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9453521900315804, |
| "grad_norm": 1.6859776973724365, |
| "learning_rate": 0.00011709401709401708, |
| "loss": 0.5544, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.9464506384731566, |
| "grad_norm": 0.4034999907016754, |
| "learning_rate": 0.00011697191697191697, |
| "loss": 0.559, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.947549086914733, |
| "grad_norm": 0.3644643723964691, |
| "learning_rate": 0.00011684981684981684, |
| "loss": 0.535, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.9486475353563093, |
| "grad_norm": 0.5826202034950256, |
| "learning_rate": 0.00011672771672771671, |
| "loss": 0.6405, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.9497459837978854, |
| "grad_norm": 0.5501505136489868, |
| "learning_rate": 0.00011660561660561661, |
| "loss": 0.5702, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9508444322394618, |
| "grad_norm": 0.7928853631019592, |
| "learning_rate": 0.00011648351648351648, |
| "loss": 0.666, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.9519428806810382, |
| "grad_norm": 0.8168489933013916, |
| "learning_rate": 0.00011636141636141635, |
| "loss": 0.4451, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.9530413291226143, |
| "grad_norm": 0.3752410113811493, |
| "learning_rate": 0.00011623931623931622, |
| "loss": 0.6552, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.9541397775641904, |
| "grad_norm": 0.9020218849182129, |
| "learning_rate": 0.00011611721611721611, |
| "loss": 0.5994, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.9552382260057668, |
| "grad_norm": 0.7668479084968567, |
| "learning_rate": 0.00011599511599511598, |
| "loss": 0.5007, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.9563366744473432, |
| "grad_norm": 0.5034022331237793, |
| "learning_rate": 0.00011587301587301585, |
| "loss": 0.5211, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.9574351228889193, |
| "grad_norm": 1.0153850317001343, |
| "learning_rate": 0.00011575091575091575, |
| "loss": 0.5953, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.9585335713304957, |
| "grad_norm": 0.40088045597076416, |
| "learning_rate": 0.00011562881562881562, |
| "loss": 0.568, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.959632019772072, |
| "grad_norm": 1.4017099142074585, |
| "learning_rate": 0.0001155067155067155, |
| "loss": 0.7058, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.9607304682136482, |
| "grad_norm": 0.6009597778320312, |
| "learning_rate": 0.00011538461538461538, |
| "loss": 0.6239, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.9618289166552245, |
| "grad_norm": 0.5155071020126343, |
| "learning_rate": 0.00011526251526251525, |
| "loss": 0.6089, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.9629273650968009, |
| "grad_norm": 0.4248057007789612, |
| "learning_rate": 0.00011514041514041513, |
| "loss": 0.6481, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.964025813538377, |
| "grad_norm": 0.6521177887916565, |
| "learning_rate": 0.00011501831501831501, |
| "loss": 0.6598, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.9651242619799532, |
| "grad_norm": 0.44697993993759155, |
| "learning_rate": 0.00011489621489621488, |
| "loss": 0.8944, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.9662227104215297, |
| "grad_norm": 0.41537097096443176, |
| "learning_rate": 0.00011477411477411476, |
| "loss": 0.5304, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.9673211588631059, |
| "grad_norm": 0.48793885111808777, |
| "learning_rate": 0.00011465201465201464, |
| "loss": 0.7262, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.968419607304682, |
| "grad_norm": 0.8768893480300903, |
| "learning_rate": 0.00011452991452991453, |
| "loss": 0.6748, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.9695180557462584, |
| "grad_norm": 0.39224761724472046, |
| "learning_rate": 0.0001144078144078144, |
| "loss": 0.5503, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.9706165041878347, |
| "grad_norm": 0.5617446899414062, |
| "learning_rate": 0.00011428571428571427, |
| "loss": 0.7329, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.9717149526294109, |
| "grad_norm": 0.3787171542644501, |
| "learning_rate": 0.00011416361416361416, |
| "loss": 0.545, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.9728134010709872, |
| "grad_norm": 1.5167701244354248, |
| "learning_rate": 0.00011404151404151403, |
| "loss": 0.492, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.9739118495125636, |
| "grad_norm": 0.6436883807182312, |
| "learning_rate": 0.0001139194139194139, |
| "loss": 0.5644, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.9750102979541397, |
| "grad_norm": 0.7104658484458923, |
| "learning_rate": 0.0001137973137973138, |
| "loss": 0.7485, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.976108746395716, |
| "grad_norm": 0.7996894717216492, |
| "learning_rate": 0.00011367521367521367, |
| "loss": 0.6918, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.9772071948372925, |
| "grad_norm": 0.6419106721878052, |
| "learning_rate": 0.00011355311355311354, |
| "loss": 0.5945, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9783056432788686, |
| "grad_norm": 0.5158131718635559, |
| "learning_rate": 0.00011343101343101343, |
| "loss": 0.6685, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.9794040917204447, |
| "grad_norm": 1.0825144052505493, |
| "learning_rate": 0.0001133089133089133, |
| "loss": 0.6774, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.980502540162021, |
| "grad_norm": 0.3999088704586029, |
| "learning_rate": 0.00011318681318681317, |
| "loss": 0.632, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.9816009886035975, |
| "grad_norm": 0.8866069316864014, |
| "learning_rate": 0.00011306471306471304, |
| "loss": 0.6541, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.9826994370451736, |
| "grad_norm": 0.3858928978443146, |
| "learning_rate": 0.00011294261294261294, |
| "loss": 0.6608, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.98379788548675, |
| "grad_norm": 0.513117790222168, |
| "learning_rate": 0.00011282051282051281, |
| "loss": 0.7598, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.9848963339283263, |
| "grad_norm": 0.3166581392288208, |
| "learning_rate": 0.00011269841269841269, |
| "loss": 0.781, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.9859947823699025, |
| "grad_norm": 0.3982362151145935, |
| "learning_rate": 0.00011257631257631257, |
| "loss": 0.873, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.9870932308114788, |
| "grad_norm": 0.3784008026123047, |
| "learning_rate": 0.00011245421245421244, |
| "loss": 0.7286, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.9881916792530552, |
| "grad_norm": 0.7578315138816833, |
| "learning_rate": 0.00011233211233211232, |
| "loss": 0.5958, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9892901276946313, |
| "grad_norm": 0.8509061932563782, |
| "learning_rate": 0.0001122100122100122, |
| "loss": 0.557, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.9903885761362075, |
| "grad_norm": 0.5107323527336121, |
| "learning_rate": 0.00011208791208791207, |
| "loss": 0.6994, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.991487024577784, |
| "grad_norm": 0.5421388149261475, |
| "learning_rate": 0.00011196581196581196, |
| "loss": 0.8839, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.9925854730193602, |
| "grad_norm": 0.7442356944084167, |
| "learning_rate": 0.00011184371184371184, |
| "loss": 0.6676, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.9936839214609363, |
| "grad_norm": 0.34132111072540283, |
| "learning_rate": 0.00011172161172161172, |
| "loss": 0.5714, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.9947823699025127, |
| "grad_norm": 0.3995620906352997, |
| "learning_rate": 0.00011159951159951159, |
| "loss": 0.4811, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.995880818344089, |
| "grad_norm": 0.5613861083984375, |
| "learning_rate": 0.00011147741147741146, |
| "loss": 0.7495, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.9969792667856652, |
| "grad_norm": 0.4366309642791748, |
| "learning_rate": 0.00011135531135531135, |
| "loss": 0.6512, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.9980777152272415, |
| "grad_norm": 0.889916718006134, |
| "learning_rate": 0.00011123321123321122, |
| "loss": 0.5544, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.999176163668818, |
| "grad_norm": 0.512112021446228, |
| "learning_rate": 0.00011111111111111109, |
| "loss": 1.136, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.000274612110394, |
| "grad_norm": 0.5241844654083252, |
| "learning_rate": 0.00011098901098901099, |
| "loss": 0.5898, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.00137306055197, |
| "grad_norm": 0.38159477710723877, |
| "learning_rate": 0.00011086691086691086, |
| "loss": 0.5523, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.0024715089935468, |
| "grad_norm": 1.0415009260177612, |
| "learning_rate": 0.00011074481074481073, |
| "loss": 0.6963, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.003569957435123, |
| "grad_norm": 0.5349957942962646, |
| "learning_rate": 0.00011062271062271062, |
| "loss": 0.4422, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.004668405876699, |
| "grad_norm": 0.4512043297290802, |
| "learning_rate": 0.00011050061050061049, |
| "loss": 0.5467, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.0057668543182756, |
| "grad_norm": 0.8268045783042908, |
| "learning_rate": 0.00011037851037851036, |
| "loss": 0.6931, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.0068653027598518, |
| "grad_norm": 0.47922319173812866, |
| "learning_rate": 0.00011025641025641026, |
| "loss": 0.707, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.007963751201428, |
| "grad_norm": 1.352858304977417, |
| "learning_rate": 0.00011013431013431013, |
| "loss": 0.5658, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.0090621996430045, |
| "grad_norm": 0.6304643154144287, |
| "learning_rate": 0.00011001221001221, |
| "loss": 0.6526, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.0101606480845806, |
| "grad_norm": 0.3759060502052307, |
| "learning_rate": 0.00010989010989010988, |
| "loss": 0.627, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.0112590965261568, |
| "grad_norm": 0.5676531195640564, |
| "learning_rate": 0.00010976800976800976, |
| "loss": 0.7568, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.012357544967733, |
| "grad_norm": 0.7481321692466736, |
| "learning_rate": 0.00010964590964590963, |
| "loss": 0.7304, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.0134559934093095, |
| "grad_norm": 1.0350905656814575, |
| "learning_rate": 0.0001095238095238095, |
| "loss": 0.7414, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.0145544418508856, |
| "grad_norm": 0.7817292809486389, |
| "learning_rate": 0.00010940170940170939, |
| "loss": 0.7742, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.0156528902924618, |
| "grad_norm": 0.44659602642059326, |
| "learning_rate": 0.00010927960927960928, |
| "loss": 0.7872, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.0167513387340383, |
| "grad_norm": 0.46931198239326477, |
| "learning_rate": 0.00010915750915750915, |
| "loss": 0.5596, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.0178497871756145, |
| "grad_norm": 0.34634560346603394, |
| "learning_rate": 0.00010903540903540903, |
| "loss": 0.6861, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.0189482356171906, |
| "grad_norm": 0.36579200625419617, |
| "learning_rate": 0.0001089133089133089, |
| "loss": 0.6586, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.020046684058767, |
| "grad_norm": 0.9167144894599915, |
| "learning_rate": 0.00010879120879120878, |
| "loss": 0.7125, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.0211451325003433, |
| "grad_norm": 0.4107789993286133, |
| "learning_rate": 0.00010866910866910866, |
| "loss": 0.6089, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.0222435809419195, |
| "grad_norm": 1.0845204591751099, |
| "learning_rate": 0.00010854700854700854, |
| "loss": 0.499, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.0233420293834956, |
| "grad_norm": 0.382376492023468, |
| "learning_rate": 0.00010842490842490841, |
| "loss": 0.5505, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.024440477825072, |
| "grad_norm": 0.38339781761169434, |
| "learning_rate": 0.00010830280830280828, |
| "loss": 0.4593, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.0255389262666483, |
| "grad_norm": 0.45328769087791443, |
| "learning_rate": 0.00010818070818070818, |
| "loss": 0.8437, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.0266373747082245, |
| "grad_norm": 0.3051920533180237, |
| "learning_rate": 0.00010805860805860805, |
| "loss": 0.6096, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.027735823149801, |
| "grad_norm": 0.4249560236930847, |
| "learning_rate": 0.00010793650793650792, |
| "loss": 0.6441, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.028834271591377, |
| "grad_norm": 0.6639708280563354, |
| "learning_rate": 0.00010781440781440781, |
| "loss": 0.716, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.0299327200329533, |
| "grad_norm": 0.4324635863304138, |
| "learning_rate": 0.00010769230769230768, |
| "loss": 0.5288, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.03103116847453, |
| "grad_norm": 0.46487629413604736, |
| "learning_rate": 0.00010757020757020755, |
| "loss": 0.4908, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.032129616916106, |
| "grad_norm": 0.5104641318321228, |
| "learning_rate": 0.00010744810744810745, |
| "loss": 0.6367, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.033228065357682, |
| "grad_norm": 0.4010922312736511, |
| "learning_rate": 0.00010732600732600732, |
| "loss": 0.4266, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.0343265137992583, |
| "grad_norm": 0.6835510730743408, |
| "learning_rate": 0.0001072039072039072, |
| "loss": 1.0077, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.035424962240835, |
| "grad_norm": 0.7012602686882019, |
| "learning_rate": 0.00010708180708180708, |
| "loss": 0.7656, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.036523410682411, |
| "grad_norm": 0.8202001452445984, |
| "learning_rate": 0.00010695970695970695, |
| "loss": 0.9796, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.037621859123987, |
| "grad_norm": 0.37708353996276855, |
| "learning_rate": 0.00010683760683760682, |
| "loss": 0.3664, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.0387203075655638, |
| "grad_norm": 0.34818801283836365, |
| "learning_rate": 0.0001067155067155067, |
| "loss": 0.5365, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.03981875600714, |
| "grad_norm": 0.46427440643310547, |
| "learning_rate": 0.0001065934065934066, |
| "loss": 0.7503, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.040917204448716, |
| "grad_norm": 0.4782754182815552, |
| "learning_rate": 0.00010647130647130647, |
| "loss": 0.9247, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.0420156528902926, |
| "grad_norm": 0.6814667582511902, |
| "learning_rate": 0.00010634920634920634, |
| "loss": 0.5365, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.0431141013318688, |
| "grad_norm": 0.4782056510448456, |
| "learning_rate": 0.00010622710622710622, |
| "loss": 0.7444, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.044212549773445, |
| "grad_norm": 0.768439769744873, |
| "learning_rate": 0.0001061050061050061, |
| "loss": 0.6386, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.0453109982150215, |
| "grad_norm": 0.9991740584373474, |
| "learning_rate": 0.00010598290598290597, |
| "loss": 0.4762, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.0464094466565976, |
| "grad_norm": 0.4244922995567322, |
| "learning_rate": 0.00010586080586080585, |
| "loss": 0.4469, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.0475078950981738, |
| "grad_norm": 0.4085465371608734, |
| "learning_rate": 0.00010573870573870573, |
| "loss": 0.7215, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.04860634353975, |
| "grad_norm": 1.3068008422851562, |
| "learning_rate": 0.0001056166056166056, |
| "loss": 0.7781, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.0497047919813265, |
| "grad_norm": 0.3995974659919739, |
| "learning_rate": 0.0001054945054945055, |
| "loss": 0.6114, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.0508032404229026, |
| "grad_norm": 0.47944560647010803, |
| "learning_rate": 0.00010537240537240537, |
| "loss": 0.7355, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.0519016888644788, |
| "grad_norm": 1.6718720197677612, |
| "learning_rate": 0.00010525030525030524, |
| "loss": 0.5987, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.0530001373060554, |
| "grad_norm": 0.46015220880508423, |
| "learning_rate": 0.00010512820512820511, |
| "loss": 0.481, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.0540985857476315, |
| "grad_norm": 0.4863795042037964, |
| "learning_rate": 0.000105006105006105, |
| "loss": 0.5877, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.0551970341892076, |
| "grad_norm": 0.9190402030944824, |
| "learning_rate": 0.00010488400488400487, |
| "loss": 0.7941, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.056295482630784, |
| "grad_norm": 0.6056554317474365, |
| "learning_rate": 0.00010476190476190474, |
| "loss": 0.5455, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.0573939310723603, |
| "grad_norm": 0.7070736289024353, |
| "learning_rate": 0.00010463980463980464, |
| "loss": 0.6112, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.0584923795139365, |
| "grad_norm": 0.5415268540382385, |
| "learning_rate": 0.00010451770451770451, |
| "loss": 0.7141, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.0595908279555126, |
| "grad_norm": 0.45696091651916504, |
| "learning_rate": 0.00010439560439560438, |
| "loss": 0.7825, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.060689276397089, |
| "grad_norm": 0.5728979706764221, |
| "learning_rate": 0.00010427350427350427, |
| "loss": 0.5869, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.0617877248386653, |
| "grad_norm": 0.5910143852233887, |
| "learning_rate": 0.00010415140415140414, |
| "loss": 0.728, |
| "step": 1877 |
| }, |
| { |
| "epoch": 2.0628861732802415, |
| "grad_norm": 0.530915379524231, |
| "learning_rate": 0.00010402930402930401, |
| "loss": 0.6459, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.063984621721818, |
| "grad_norm": 0.36358964443206787, |
| "learning_rate": 0.00010390720390720391, |
| "loss": 0.7536, |
| "step": 1879 |
| }, |
| { |
| "epoch": 2.065083070163394, |
| "grad_norm": 2.7523410320281982, |
| "learning_rate": 0.00010378510378510379, |
| "loss": 0.6347, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.0661815186049703, |
| "grad_norm": 0.6842527389526367, |
| "learning_rate": 0.00010366300366300366, |
| "loss": 0.4943, |
| "step": 1881 |
| }, |
| { |
| "epoch": 2.067279967046547, |
| "grad_norm": 0.5830293297767639, |
| "learning_rate": 0.00010354090354090353, |
| "loss": 0.5855, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.068378415488123, |
| "grad_norm": 0.981920599937439, |
| "learning_rate": 0.00010341880341880341, |
| "loss": 0.4425, |
| "step": 1883 |
| }, |
| { |
| "epoch": 2.069476863929699, |
| "grad_norm": 2.0826029777526855, |
| "learning_rate": 0.00010329670329670329, |
| "loss": 0.5399, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.0705753123712753, |
| "grad_norm": 0.4648442268371582, |
| "learning_rate": 0.00010317460317460316, |
| "loss": 0.6203, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.071673760812852, |
| "grad_norm": 0.5086346864700317, |
| "learning_rate": 0.00010305250305250304, |
| "loss": 0.6091, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.072772209254428, |
| "grad_norm": 0.40404266119003296, |
| "learning_rate": 0.00010293040293040292, |
| "loss": 0.5013, |
| "step": 1887 |
| }, |
| { |
| "epoch": 2.073870657696004, |
| "grad_norm": 2.0507569313049316, |
| "learning_rate": 0.0001028083028083028, |
| "loss": 0.7822, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.074969106137581, |
| "grad_norm": 0.9318211078643799, |
| "learning_rate": 0.00010268620268620269, |
| "loss": 0.6638, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.076067554579157, |
| "grad_norm": 0.7601054310798645, |
| "learning_rate": 0.00010256410256410256, |
| "loss": 0.6085, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.077166003020733, |
| "grad_norm": 1.1299306154251099, |
| "learning_rate": 0.00010244200244200243, |
| "loss": 0.682, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.0782644514623096, |
| "grad_norm": 0.5009475350379944, |
| "learning_rate": 0.0001023199023199023, |
| "loss": 0.7229, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.079362899903886, |
| "grad_norm": 0.3432561159133911, |
| "learning_rate": 0.00010219780219780219, |
| "loss": 0.5991, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.080461348345462, |
| "grad_norm": 0.5224031805992126, |
| "learning_rate": 0.00010207570207570206, |
| "loss": 0.3687, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.0815597967870385, |
| "grad_norm": 0.4849548935890198, |
| "learning_rate": 0.00010195360195360193, |
| "loss": 0.507, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.0826582452286146, |
| "grad_norm": 0.6093185544013977, |
| "learning_rate": 0.00010183150183150183, |
| "loss": 0.7019, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.083756693670191, |
| "grad_norm": 0.7408457398414612, |
| "learning_rate": 0.0001017094017094017, |
| "loss": 0.6331, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.084855142111767, |
| "grad_norm": 0.67701655626297, |
| "learning_rate": 0.00010158730158730157, |
| "loss": 0.6685, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.0859535905533435, |
| "grad_norm": 0.2880030870437622, |
| "learning_rate": 0.00010146520146520146, |
| "loss": 0.4043, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.0870520389949196, |
| "grad_norm": 0.45890796184539795, |
| "learning_rate": 0.00010134310134310133, |
| "loss": 0.3695, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.088150487436496, |
| "grad_norm": 0.7898344397544861, |
| "learning_rate": 0.0001012210012210012, |
| "loss": 0.7875, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.0892489358780724, |
| "grad_norm": 0.5648753046989441, |
| "learning_rate": 0.0001010989010989011, |
| "loss": 0.6058, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.0903473843196485, |
| "grad_norm": 0.7880465984344482, |
| "learning_rate": 0.00010097680097680098, |
| "loss": 0.6403, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.0914458327612246, |
| "grad_norm": 0.4169737696647644, |
| "learning_rate": 0.00010085470085470085, |
| "loss": 0.71, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.0925442812028012, |
| "grad_norm": 0.33653560280799866, |
| "learning_rate": 0.00010073260073260072, |
| "loss": 0.6278, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.0936427296443774, |
| "grad_norm": 0.6861558556556702, |
| "learning_rate": 0.0001006105006105006, |
| "loss": 0.8463, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.0947411780859535, |
| "grad_norm": 0.29407018423080444, |
| "learning_rate": 0.00010048840048840048, |
| "loss": 0.5644, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.09583962652753, |
| "grad_norm": 0.673083484172821, |
| "learning_rate": 0.00010036630036630035, |
| "loss": 0.8353, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.0969380749691062, |
| "grad_norm": 0.429061621427536, |
| "learning_rate": 0.00010024420024420023, |
| "loss": 0.6381, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.0980365234106824, |
| "grad_norm": 0.5113368630409241, |
| "learning_rate": 0.00010012210012210012, |
| "loss": 0.7603, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.0991349718522585, |
| "grad_norm": 0.9005820751190186, |
| "learning_rate": 9.999999999999999e-05, |
| "loss": 0.6331, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.100233420293835, |
| "grad_norm": 0.489851176738739, |
| "learning_rate": 9.987789987789988e-05, |
| "loss": 0.8564, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.1013318687354112, |
| "grad_norm": 0.42647236585617065, |
| "learning_rate": 9.975579975579975e-05, |
| "loss": 0.5496, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.1024303171769874, |
| "grad_norm": 0.9061693549156189, |
| "learning_rate": 9.963369963369962e-05, |
| "loss": 0.4478, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.103528765618564, |
| "grad_norm": 0.4721933901309967, |
| "learning_rate": 9.95115995115995e-05, |
| "loss": 0.6066, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.10462721406014, |
| "grad_norm": 0.7265921831130981, |
| "learning_rate": 9.938949938949938e-05, |
| "loss": 0.7195, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.1057256625017162, |
| "grad_norm": 0.4521386921405792, |
| "learning_rate": 9.926739926739925e-05, |
| "loss": 0.6476, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.106824110943293, |
| "grad_norm": 0.42982912063598633, |
| "learning_rate": 9.914529914529912e-05, |
| "loss": 0.535, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.107922559384869, |
| "grad_norm": 0.4758259952068329, |
| "learning_rate": 9.902319902319902e-05, |
| "loss": 0.8106, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.109021007826445, |
| "grad_norm": 0.69195157289505, |
| "learning_rate": 9.890109890109889e-05, |
| "loss": 0.6643, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.110119456268021, |
| "grad_norm": 0.8207395672798157, |
| "learning_rate": 9.877899877899876e-05, |
| "loss": 0.7535, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.111217904709598, |
| "grad_norm": 1.4245035648345947, |
| "learning_rate": 9.865689865689865e-05, |
| "loss": 0.6721, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.112316353151174, |
| "grad_norm": 0.5496362447738647, |
| "learning_rate": 9.853479853479852e-05, |
| "loss": 0.5367, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.11341480159275, |
| "grad_norm": 0.5466665625572205, |
| "learning_rate": 9.84126984126984e-05, |
| "loss": 0.6083, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.1145132500343267, |
| "grad_norm": 0.7750464677810669, |
| "learning_rate": 9.829059829059829e-05, |
| "loss": 0.663, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.115611698475903, |
| "grad_norm": 0.4978208541870117, |
| "learning_rate": 9.816849816849817e-05, |
| "loss": 0.6334, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.116710146917479, |
| "grad_norm": 0.6415550708770752, |
| "learning_rate": 9.804639804639804e-05, |
| "loss": 0.6477, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.1178085953590555, |
| "grad_norm": 0.644123911857605, |
| "learning_rate": 9.792429792429792e-05, |
| "loss": 0.668, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.1189070438006317, |
| "grad_norm": 0.39706236124038696, |
| "learning_rate": 9.78021978021978e-05, |
| "loss": 0.5875, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.120005492242208, |
| "grad_norm": 1.3733233213424683, |
| "learning_rate": 9.768009768009767e-05, |
| "loss": 0.6023, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.121103940683784, |
| "grad_norm": 0.48839983344078064, |
| "learning_rate": 9.755799755799754e-05, |
| "loss": 0.5693, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.1222023891253605, |
| "grad_norm": 0.3107692301273346, |
| "learning_rate": 9.743589743589744e-05, |
| "loss": 0.5822, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.1233008375669367, |
| "grad_norm": 0.3988654911518097, |
| "learning_rate": 9.731379731379731e-05, |
| "loss": 0.5989, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.124399286008513, |
| "grad_norm": 1.1887754201889038, |
| "learning_rate": 9.719169719169718e-05, |
| "loss": 0.6382, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.1254977344500894, |
| "grad_norm": 0.43282651901245117, |
| "learning_rate": 9.706959706959707e-05, |
| "loss": 0.5649, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.1265961828916655, |
| "grad_norm": 0.39243975281715393, |
| "learning_rate": 9.694749694749694e-05, |
| "loss": 0.7005, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.1276946313332417, |
| "grad_norm": 0.7401454448699951, |
| "learning_rate": 9.682539682539681e-05, |
| "loss": 1.0632, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.1287930797748182, |
| "grad_norm": 0.6976983547210693, |
| "learning_rate": 9.67032967032967e-05, |
| "loss": 0.562, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.1298915282163944, |
| "grad_norm": 0.9784336686134338, |
| "learning_rate": 9.658119658119657e-05, |
| "loss": 0.8115, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.1309899766579705, |
| "grad_norm": 0.5289125442504883, |
| "learning_rate": 9.645909645909644e-05, |
| "loss": 0.6161, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.132088425099547, |
| "grad_norm": 1.414559006690979, |
| "learning_rate": 9.633699633699634e-05, |
| "loss": 0.7115, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.1331868735411232, |
| "grad_norm": 0.5444177389144897, |
| "learning_rate": 9.621489621489621e-05, |
| "loss": 0.6211, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.1342853219826994, |
| "grad_norm": 0.637030839920044, |
| "learning_rate": 9.609279609279608e-05, |
| "loss": 0.8747, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.1353837704242755, |
| "grad_norm": 0.5926198363304138, |
| "learning_rate": 9.597069597069595e-05, |
| "loss": 0.8673, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.136482218865852, |
| "grad_norm": 0.3638801872730255, |
| "learning_rate": 9.584859584859584e-05, |
| "loss": 0.4698, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.1375806673074282, |
| "grad_norm": 0.5823031067848206, |
| "learning_rate": 9.572649572649571e-05, |
| "loss": 0.6988, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.1386791157490044, |
| "grad_norm": 0.44348934292793274, |
| "learning_rate": 9.560439560439558e-05, |
| "loss": 0.6667, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.139777564190581, |
| "grad_norm": 3.177112579345703, |
| "learning_rate": 9.548229548229548e-05, |
| "loss": 0.8738, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.140876012632157, |
| "grad_norm": 1.3834997415542603, |
| "learning_rate": 9.536019536019536e-05, |
| "loss": 0.528, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.1419744610737332, |
| "grad_norm": 0.5514722466468811, |
| "learning_rate": 9.523809523809523e-05, |
| "loss": 0.5058, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.14307290951531, |
| "grad_norm": 0.8795000314712524, |
| "learning_rate": 9.511599511599511e-05, |
| "loss": 0.6368, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.144171357956886, |
| "grad_norm": 1.0043178796768188, |
| "learning_rate": 9.499389499389498e-05, |
| "loss": 0.5701, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.145269806398462, |
| "grad_norm": 1.8537780046463013, |
| "learning_rate": 9.487179487179486e-05, |
| "loss": 0.6978, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.1463682548400387, |
| "grad_norm": 0.5239475965499878, |
| "learning_rate": 9.474969474969476e-05, |
| "loss": 0.7093, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.147466703281615, |
| "grad_norm": 0.7944377064704895, |
| "learning_rate": 9.462759462759463e-05, |
| "loss": 0.7625, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.148565151723191, |
| "grad_norm": 0.7356003522872925, |
| "learning_rate": 9.45054945054945e-05, |
| "loss": 0.6845, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.149663600164767, |
| "grad_norm": 1.3590694665908813, |
| "learning_rate": 9.438339438339437e-05, |
| "loss": 0.6964, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.1507620486063437, |
| "grad_norm": 0.40889453887939453, |
| "learning_rate": 9.426129426129426e-05, |
| "loss": 0.6643, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.15186049704792, |
| "grad_norm": 0.6347643136978149, |
| "learning_rate": 9.413919413919413e-05, |
| "loss": 1.0002, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.152958945489496, |
| "grad_norm": 0.3661377429962158, |
| "learning_rate": 9.4017094017094e-05, |
| "loss": 0.5084, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.1540573939310725, |
| "grad_norm": 0.8262574672698975, |
| "learning_rate": 9.389499389499389e-05, |
| "loss": 0.5658, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.1551558423726487, |
| "grad_norm": 0.6054818034172058, |
| "learning_rate": 9.377289377289376e-05, |
| "loss": 0.6349, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.156254290814225, |
| "grad_norm": 0.3696078658103943, |
| "learning_rate": 9.365079365079364e-05, |
| "loss": 0.5746, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.157352739255801, |
| "grad_norm": 0.7613049745559692, |
| "learning_rate": 9.352869352869353e-05, |
| "loss": 0.5204, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.1584511876973775, |
| "grad_norm": 0.6841816306114197, |
| "learning_rate": 9.34065934065934e-05, |
| "loss": 0.813, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.1595496361389537, |
| "grad_norm": 0.902998685836792, |
| "learning_rate": 9.328449328449327e-05, |
| "loss": 0.6288, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.16064808458053, |
| "grad_norm": 0.5367470979690552, |
| "learning_rate": 9.316239316239316e-05, |
| "loss": 0.6689, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.1617465330221064, |
| "grad_norm": 0.9443572163581848, |
| "learning_rate": 9.304029304029303e-05, |
| "loss": 0.6864, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.1628449814636825, |
| "grad_norm": 0.42191457748413086, |
| "learning_rate": 9.29181929181929e-05, |
| "loss": 0.6509, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.1639434299052587, |
| "grad_norm": 0.6019404530525208, |
| "learning_rate": 9.279609279609277e-05, |
| "loss": 0.5252, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.1650418783468353, |
| "grad_norm": 1.9933907985687256, |
| "learning_rate": 9.267399267399267e-05, |
| "loss": 0.6042, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.1661403267884114, |
| "grad_norm": 0.33075836300849915, |
| "learning_rate": 9.255189255189255e-05, |
| "loss": 0.579, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.1672387752299875, |
| "grad_norm": 0.37899547815322876, |
| "learning_rate": 9.242979242979242e-05, |
| "loss": 0.5006, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.168337223671564, |
| "grad_norm": 0.6482734680175781, |
| "learning_rate": 9.23076923076923e-05, |
| "loss": 0.4844, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.1694356721131403, |
| "grad_norm": 0.47632062435150146, |
| "learning_rate": 9.218559218559217e-05, |
| "loss": 0.5844, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.1705341205547164, |
| "grad_norm": 0.3402813971042633, |
| "learning_rate": 9.206349206349205e-05, |
| "loss": 0.6397, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.1716325689962925, |
| "grad_norm": 0.47405871748924255, |
| "learning_rate": 9.194139194139195e-05, |
| "loss": 0.6436, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.172731017437869, |
| "grad_norm": 0.5474234223365784, |
| "learning_rate": 9.181929181929182e-05, |
| "loss": 0.5758, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.1738294658794453, |
| "grad_norm": 0.5423378348350525, |
| "learning_rate": 9.169719169719169e-05, |
| "loss": 0.5882, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.1749279143210214, |
| "grad_norm": 0.32848963141441345, |
| "learning_rate": 9.157509157509158e-05, |
| "loss": 0.5828, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.176026362762598, |
| "grad_norm": 0.6646802425384521, |
| "learning_rate": 9.145299145299145e-05, |
| "loss": 0.551, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.177124811204174, |
| "grad_norm": 0.4560980200767517, |
| "learning_rate": 9.133089133089132e-05, |
| "loss": 0.705, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.1782232596457503, |
| "grad_norm": 0.4531053304672241, |
| "learning_rate": 9.120879120879119e-05, |
| "loss": 0.7471, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.179321708087327, |
| "grad_norm": 0.5881507992744446, |
| "learning_rate": 9.108669108669108e-05, |
| "loss": 0.7559, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.180420156528903, |
| "grad_norm": 0.41462886333465576, |
| "learning_rate": 9.096459096459096e-05, |
| "loss": 0.5674, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.181518604970479, |
| "grad_norm": 0.46718108654022217, |
| "learning_rate": 9.084249084249083e-05, |
| "loss": 0.7149, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.1826170534120557, |
| "grad_norm": 0.49290111660957336, |
| "learning_rate": 9.072039072039072e-05, |
| "loss": 0.5641, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.183715501853632, |
| "grad_norm": 0.398296594619751, |
| "learning_rate": 9.059829059829059e-05, |
| "loss": 0.5177, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.184813950295208, |
| "grad_norm": 0.8241115212440491, |
| "learning_rate": 9.047619047619046e-05, |
| "loss": 0.7864, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.185912398736784, |
| "grad_norm": 1.1335865259170532, |
| "learning_rate": 9.035409035409035e-05, |
| "loss": 0.6167, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.1870108471783607, |
| "grad_norm": 0.4479789435863495, |
| "learning_rate": 9.023199023199022e-05, |
| "loss": 0.6365, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.188109295619937, |
| "grad_norm": 0.4892582297325134, |
| "learning_rate": 9.010989010989009e-05, |
| "loss": 0.6283, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.189207744061513, |
| "grad_norm": 0.8397974371910095, |
| "learning_rate": 8.998778998778999e-05, |
| "loss": 0.7123, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.1903061925030896, |
| "grad_norm": 0.5295377969741821, |
| "learning_rate": 8.986568986568986e-05, |
| "loss": 0.4033, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.1914046409446657, |
| "grad_norm": 0.464832067489624, |
| "learning_rate": 8.974358974358974e-05, |
| "loss": 0.8228, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.192503089386242, |
| "grad_norm": 0.381369024515152, |
| "learning_rate": 8.962148962148961e-05, |
| "loss": 0.6267, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.193601537827818, |
| "grad_norm": 0.7176710963249207, |
| "learning_rate": 8.949938949938949e-05, |
| "loss": 0.7008, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.1946999862693946, |
| "grad_norm": 2.569753885269165, |
| "learning_rate": 8.937728937728936e-05, |
| "loss": 0.6899, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.1957984347109707, |
| "grad_norm": 0.5020056962966919, |
| "learning_rate": 8.925518925518924e-05, |
| "loss": 0.527, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.196896883152547, |
| "grad_norm": 1.7054524421691895, |
| "learning_rate": 8.913308913308914e-05, |
| "loss": 0.5455, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1979953315941234, |
| "grad_norm": 0.5037225484848022, |
| "learning_rate": 8.901098901098901e-05, |
| "loss": 0.7445, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.1990937800356996, |
| "grad_norm": 0.8109555840492249, |
| "learning_rate": 8.888888888888888e-05, |
| "loss": 0.624, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.2001922284772757, |
| "grad_norm": 0.47120043635368347, |
| "learning_rate": 8.876678876678877e-05, |
| "loss": 0.6858, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.2012906769188523, |
| "grad_norm": 0.6166191101074219, |
| "learning_rate": 8.864468864468864e-05, |
| "loss": 0.4528, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.2023891253604284, |
| "grad_norm": 0.4999128580093384, |
| "learning_rate": 8.852258852258851e-05, |
| "loss": 0.712, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.2034875738020046, |
| "grad_norm": 1.1858354806900024, |
| "learning_rate": 8.84004884004884e-05, |
| "loss": 0.7647, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.204586022243581, |
| "grad_norm": 0.4223528206348419, |
| "learning_rate": 8.827838827838828e-05, |
| "loss": 0.6553, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.2056844706851573, |
| "grad_norm": 0.41678956151008606, |
| "learning_rate": 8.815628815628815e-05, |
| "loss": 0.6033, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.2067829191267334, |
| "grad_norm": 0.5812666416168213, |
| "learning_rate": 8.803418803418802e-05, |
| "loss": 0.6016, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.2078813675683095, |
| "grad_norm": 0.5553560256958008, |
| "learning_rate": 8.791208791208791e-05, |
| "loss": 0.7621, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.208979816009886, |
| "grad_norm": 0.6392796635627747, |
| "learning_rate": 8.778998778998778e-05, |
| "loss": 0.567, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.2100782644514623, |
| "grad_norm": 1.0086902379989624, |
| "learning_rate": 8.766788766788765e-05, |
| "loss": 0.9432, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.2111767128930384, |
| "grad_norm": 1.3578602075576782, |
| "learning_rate": 8.754578754578754e-05, |
| "loss": 0.5107, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.212275161334615, |
| "grad_norm": 0.5530524849891663, |
| "learning_rate": 8.742368742368741e-05, |
| "loss": 0.6078, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.213373609776191, |
| "grad_norm": 0.3795104920864105, |
| "learning_rate": 8.730158730158728e-05, |
| "loss": 0.4889, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.2144720582177673, |
| "grad_norm": 0.40977227687835693, |
| "learning_rate": 8.717948717948718e-05, |
| "loss": 0.6295, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.215570506659344, |
| "grad_norm": 0.4882934093475342, |
| "learning_rate": 8.705738705738705e-05, |
| "loss": 0.7219, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.21666895510092, |
| "grad_norm": 0.7966530919075012, |
| "learning_rate": 8.693528693528693e-05, |
| "loss": 0.5342, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.217767403542496, |
| "grad_norm": 0.6992311477661133, |
| "learning_rate": 8.681318681318681e-05, |
| "loss": 0.5932, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.2188658519840727, |
| "grad_norm": 0.396427720785141, |
| "learning_rate": 8.669108669108668e-05, |
| "loss": 0.5838, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.219964300425649, |
| "grad_norm": 0.5625690817832947, |
| "learning_rate": 8.656898656898655e-05, |
| "loss": 0.7605, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.221062748867225, |
| "grad_norm": 0.6052583456039429, |
| "learning_rate": 8.644688644688643e-05, |
| "loss": 0.6572, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.222161197308801, |
| "grad_norm": 0.7201973795890808, |
| "learning_rate": 8.632478632478633e-05, |
| "loss": 0.4924, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.2232596457503777, |
| "grad_norm": 0.4222647249698639, |
| "learning_rate": 8.62026862026862e-05, |
| "loss": 0.7764, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.224358094191954, |
| "grad_norm": 0.5168121457099915, |
| "learning_rate": 8.608058608058607e-05, |
| "loss": 0.5766, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.22545654263353, |
| "grad_norm": 0.886203408241272, |
| "learning_rate": 8.595848595848596e-05, |
| "loss": 0.3804, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.2265549910751066, |
| "grad_norm": 1.7365875244140625, |
| "learning_rate": 8.583638583638583e-05, |
| "loss": 0.6583, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.2276534395166827, |
| "grad_norm": 0.44519639015197754, |
| "learning_rate": 8.57142857142857e-05, |
| "loss": 0.7322, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.228751887958259, |
| "grad_norm": 0.4888206422328949, |
| "learning_rate": 8.55921855921856e-05, |
| "loss": 0.6645, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.2298503363998354, |
| "grad_norm": 0.598225474357605, |
| "learning_rate": 8.547008547008547e-05, |
| "loss": 0.7903, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.2309487848414116, |
| "grad_norm": 0.8521910905838013, |
| "learning_rate": 8.534798534798534e-05, |
| "loss": 0.8573, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.2320472332829877, |
| "grad_norm": 1.6346311569213867, |
| "learning_rate": 8.522588522588523e-05, |
| "loss": 0.5653, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.233145681724564, |
| "grad_norm": 0.6574315428733826, |
| "learning_rate": 8.51037851037851e-05, |
| "loss": 0.5289, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.2342441301661404, |
| "grad_norm": 0.3821216821670532, |
| "learning_rate": 8.498168498168497e-05, |
| "loss": 0.4627, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.2353425786077166, |
| "grad_norm": 0.28965023159980774, |
| "learning_rate": 8.485958485958484e-05, |
| "loss": 0.3696, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.2364410270492927, |
| "grad_norm": 0.8256242275238037, |
| "learning_rate": 8.473748473748473e-05, |
| "loss": 0.6305, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.2375394754908693, |
| "grad_norm": 0.8374451398849487, |
| "learning_rate": 8.46153846153846e-05, |
| "loss": 0.5038, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.2386379239324454, |
| "grad_norm": 0.5931464433670044, |
| "learning_rate": 8.449328449328449e-05, |
| "loss": 0.6928, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.2397363723740216, |
| "grad_norm": 0.5120035409927368, |
| "learning_rate": 8.437118437118437e-05, |
| "loss": 0.6004, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.240834820815598, |
| "grad_norm": 0.6345282196998596, |
| "learning_rate": 8.424908424908424e-05, |
| "loss": 0.866, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.2419332692571743, |
| "grad_norm": 0.5632284283638, |
| "learning_rate": 8.412698412698412e-05, |
| "loss": 0.406, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.2430317176987504, |
| "grad_norm": 0.4784685969352722, |
| "learning_rate": 8.4004884004884e-05, |
| "loss": 0.4732, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.2441301661403266, |
| "grad_norm": 0.47678086161613464, |
| "learning_rate": 8.388278388278387e-05, |
| "loss": 0.502, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.245228614581903, |
| "grad_norm": 0.6543307304382324, |
| "learning_rate": 8.376068376068374e-05, |
| "loss": 0.7183, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.2463270630234793, |
| "grad_norm": 0.6147063374519348, |
| "learning_rate": 8.363858363858364e-05, |
| "loss": 0.618, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.2474255114650554, |
| "grad_norm": 0.5867168307304382, |
| "learning_rate": 8.351648351648352e-05, |
| "loss": 0.7749, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.248523959906632, |
| "grad_norm": 1.164838433265686, |
| "learning_rate": 8.339438339438339e-05, |
| "loss": 0.6261, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.249622408348208, |
| "grad_norm": 0.6695102453231812, |
| "learning_rate": 8.327228327228326e-05, |
| "loss": 0.6172, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.2507208567897843, |
| "grad_norm": 0.43873751163482666, |
| "learning_rate": 8.315018315018315e-05, |
| "loss": 0.7032, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.251819305231361, |
| "grad_norm": 0.439897745847702, |
| "learning_rate": 8.302808302808302e-05, |
| "loss": 0.7744, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.252917753672937, |
| "grad_norm": 0.6671053767204285, |
| "learning_rate": 8.290598290598289e-05, |
| "loss": 0.6877, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.254016202114513, |
| "grad_norm": 0.37354105710983276, |
| "learning_rate": 8.278388278388279e-05, |
| "loss": 0.5653, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.2551146505560897, |
| "grad_norm": 0.5615684390068054, |
| "learning_rate": 8.266178266178266e-05, |
| "loss": 0.5961, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.256213098997666, |
| "grad_norm": 2.0932323932647705, |
| "learning_rate": 8.253968253968253e-05, |
| "loss": 0.6139, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.257311547439242, |
| "grad_norm": 0.5486952066421509, |
| "learning_rate": 8.241758241758242e-05, |
| "loss": 0.7816, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.258409995880818, |
| "grad_norm": 0.7377699017524719, |
| "learning_rate": 8.229548229548229e-05, |
| "loss": 0.5036, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.2595084443223947, |
| "grad_norm": 0.7057545781135559, |
| "learning_rate": 8.217338217338216e-05, |
| "loss": 0.5788, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.260606892763971, |
| "grad_norm": 0.5388674736022949, |
| "learning_rate": 8.205128205128205e-05, |
| "loss": 0.7079, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.261705341205547, |
| "grad_norm": 0.620943546295166, |
| "learning_rate": 8.192918192918192e-05, |
| "loss": 0.6223, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.2628037896471236, |
| "grad_norm": 0.6159489154815674, |
| "learning_rate": 8.18070818070818e-05, |
| "loss": 0.7277, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.2639022380886997, |
| "grad_norm": 0.5745131373405457, |
| "learning_rate": 8.168498168498168e-05, |
| "loss": 0.6356, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.265000686530276, |
| "grad_norm": 0.4925720989704132, |
| "learning_rate": 8.156288156288156e-05, |
| "loss": 0.6342, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.2660991349718524, |
| "grad_norm": 0.410692036151886, |
| "learning_rate": 8.144078144078143e-05, |
| "loss": 0.5903, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.2671975834134286, |
| "grad_norm": 0.8246005177497864, |
| "learning_rate": 8.13186813186813e-05, |
| "loss": 0.4048, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.2682960318550047, |
| "grad_norm": 0.5054492950439453, |
| "learning_rate": 8.119658119658119e-05, |
| "loss": 0.5797, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.2693944802965813, |
| "grad_norm": 0.6249692440032959, |
| "learning_rate": 8.107448107448106e-05, |
| "loss": 0.5434, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.2704929287381574, |
| "grad_norm": 0.5582659244537354, |
| "learning_rate": 8.095238095238093e-05, |
| "loss": 0.5925, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.2715913771797336, |
| "grad_norm": 0.38472238183021545, |
| "learning_rate": 8.083028083028083e-05, |
| "loss": 0.7325, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.2726898256213097, |
| "grad_norm": 0.4649077355861664, |
| "learning_rate": 8.07081807081807e-05, |
| "loss": 0.6244, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.2737882740628863, |
| "grad_norm": 0.38582849502563477, |
| "learning_rate": 8.058608058608058e-05, |
| "loss": 0.7696, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.2748867225044624, |
| "grad_norm": 0.4612105190753937, |
| "learning_rate": 8.046398046398045e-05, |
| "loss": 0.6453, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.2759851709460386, |
| "grad_norm": 0.6572852730751038, |
| "learning_rate": 8.034188034188034e-05, |
| "loss": 0.7417, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.277083619387615, |
| "grad_norm": 0.6322109699249268, |
| "learning_rate": 8.021978021978021e-05, |
| "loss": 0.2827, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.2781820678291913, |
| "grad_norm": 1.2452771663665771, |
| "learning_rate": 8.009768009768008e-05, |
| "loss": 0.7441, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.2792805162707674, |
| "grad_norm": 0.32154834270477295, |
| "learning_rate": 7.997557997557998e-05, |
| "loss": 0.4606, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.2803789647123436, |
| "grad_norm": 1.0170034170150757, |
| "learning_rate": 7.985347985347985e-05, |
| "loss": 0.7003, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.28147741315392, |
| "grad_norm": 0.7780435085296631, |
| "learning_rate": 7.973137973137972e-05, |
| "loss": 0.5847, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.2825758615954963, |
| "grad_norm": 0.6422854661941528, |
| "learning_rate": 7.960927960927961e-05, |
| "loss": 0.6278, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.2836743100370724, |
| "grad_norm": 0.5440393090248108, |
| "learning_rate": 7.948717948717948e-05, |
| "loss": 0.6313, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.284772758478649, |
| "grad_norm": 0.5774940848350525, |
| "learning_rate": 7.936507936507935e-05, |
| "loss": 0.7504, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.285871206920225, |
| "grad_norm": 0.44180789589881897, |
| "learning_rate": 7.924297924297924e-05, |
| "loss": 0.5806, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.2869696553618013, |
| "grad_norm": 0.8452728390693665, |
| "learning_rate": 7.912087912087912e-05, |
| "loss": 0.5753, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.288068103803378, |
| "grad_norm": 0.40172943472862244, |
| "learning_rate": 7.8998778998779e-05, |
| "loss": 0.5565, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.289166552244954, |
| "grad_norm": 0.3919180929660797, |
| "learning_rate": 7.887667887667887e-05, |
| "loss": 0.4951, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.29026500068653, |
| "grad_norm": 1.0796260833740234, |
| "learning_rate": 7.875457875457875e-05, |
| "loss": 0.733, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.2913634491281067, |
| "grad_norm": 0.5640047788619995, |
| "learning_rate": 7.863247863247862e-05, |
| "loss": 0.4625, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.292461897569683, |
| "grad_norm": 0.8736083507537842, |
| "learning_rate": 7.85103785103785e-05, |
| "loss": 0.5532, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.293560346011259, |
| "grad_norm": 0.5358221530914307, |
| "learning_rate": 7.838827838827838e-05, |
| "loss": 0.6397, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.294658794452835, |
| "grad_norm": 5.207391262054443, |
| "learning_rate": 7.826617826617825e-05, |
| "loss": 0.6402, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.2957572428944117, |
| "grad_norm": 0.4122523069381714, |
| "learning_rate": 7.814407814407813e-05, |
| "loss": 0.474, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.296855691335988, |
| "grad_norm": 2.8296186923980713, |
| "learning_rate": 7.802197802197802e-05, |
| "loss": 0.5197, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.297954139777564, |
| "grad_norm": 0.6898410320281982, |
| "learning_rate": 7.78998778998779e-05, |
| "loss": 0.782, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.2990525882191406, |
| "grad_norm": 0.37363025546073914, |
| "learning_rate": 7.777777777777777e-05, |
| "loss": 0.5824, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.3001510366607167, |
| "grad_norm": 0.5120764374732971, |
| "learning_rate": 7.765567765567765e-05, |
| "loss": 0.7326, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.301249485102293, |
| "grad_norm": 0.6517985463142395, |
| "learning_rate": 7.753357753357753e-05, |
| "loss": 0.6274, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.3023479335438695, |
| "grad_norm": 0.8033846020698547, |
| "learning_rate": 7.74114774114774e-05, |
| "loss": 0.7093, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.3034463819854456, |
| "grad_norm": 0.896397590637207, |
| "learning_rate": 7.728937728937727e-05, |
| "loss": 0.6685, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.3045448304270217, |
| "grad_norm": 0.4606597423553467, |
| "learning_rate": 7.716727716727717e-05, |
| "loss": 0.5821, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.3056432788685983, |
| "grad_norm": 0.9286845922470093, |
| "learning_rate": 7.704517704517704e-05, |
| "loss": 0.7537, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.3067417273101745, |
| "grad_norm": 0.6514043211936951, |
| "learning_rate": 7.692307692307691e-05, |
| "loss": 0.5644, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.3078401757517506, |
| "grad_norm": 0.4881083369255066, |
| "learning_rate": 7.68009768009768e-05, |
| "loss": 0.5348, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.3089386241933267, |
| "grad_norm": 2.688716173171997, |
| "learning_rate": 7.667887667887667e-05, |
| "loss": 0.6732, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.3100370726349033, |
| "grad_norm": 0.4597708582878113, |
| "learning_rate": 7.655677655677654e-05, |
| "loss": 0.6166, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.3111355210764795, |
| "grad_norm": 0.7629315853118896, |
| "learning_rate": 7.643467643467644e-05, |
| "loss": 0.4677, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.3122339695180556, |
| "grad_norm": 0.7282788753509521, |
| "learning_rate": 7.631257631257631e-05, |
| "loss": 0.6841, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.313332417959632, |
| "grad_norm": 0.5421862006187439, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.7274, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.3144308664012083, |
| "grad_norm": 0.7396867871284485, |
| "learning_rate": 7.606837606837607e-05, |
| "loss": 0.6546, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.3155293148427845, |
| "grad_norm": 0.34731313586235046, |
| "learning_rate": 7.594627594627594e-05, |
| "loss": 0.72, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.3166277632843606, |
| "grad_norm": 1.1024978160858154, |
| "learning_rate": 7.582417582417581e-05, |
| "loss": 0.7304, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.317726211725937, |
| "grad_norm": 0.5866183638572693, |
| "learning_rate": 7.570207570207569e-05, |
| "loss": 0.4912, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.3188246601675133, |
| "grad_norm": 0.8068836331367493, |
| "learning_rate": 7.557997557997557e-05, |
| "loss": 0.5342, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.31992310860909, |
| "grad_norm": 0.6417646408081055, |
| "learning_rate": 7.545787545787544e-05, |
| "loss": 0.7642, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.321021557050666, |
| "grad_norm": 0.4545808434486389, |
| "learning_rate": 7.533577533577533e-05, |
| "loss": 0.5681, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.322120005492242, |
| "grad_norm": 0.3567211329936981, |
| "learning_rate": 7.521367521367521e-05, |
| "loss": 0.6368, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.3232184539338183, |
| "grad_norm": 0.5747010707855225, |
| "learning_rate": 7.509157509157509e-05, |
| "loss": 0.5848, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.324316902375395, |
| "grad_norm": 0.46303555369377136, |
| "learning_rate": 7.496947496947497e-05, |
| "loss": 0.6577, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.325415350816971, |
| "grad_norm": 0.5343080759048462, |
| "learning_rate": 7.484737484737484e-05, |
| "loss": 0.8531, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.326513799258547, |
| "grad_norm": 0.9027140736579895, |
| "learning_rate": 7.472527472527472e-05, |
| "loss": 0.6271, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.3276122477001238, |
| "grad_norm": 0.6390063166618347, |
| "learning_rate": 7.460317460317459e-05, |
| "loss": 0.5669, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.3287106961417, |
| "grad_norm": 0.4965013563632965, |
| "learning_rate": 7.448107448107447e-05, |
| "loss": 0.6362, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.329809144583276, |
| "grad_norm": 0.49252766370773315, |
| "learning_rate": 7.435897435897436e-05, |
| "loss": 0.6703, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.330907593024852, |
| "grad_norm": 0.7043023705482483, |
| "learning_rate": 7.423687423687423e-05, |
| "loss": 0.7114, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.3320060414664288, |
| "grad_norm": 0.4373185634613037, |
| "learning_rate": 7.41147741147741e-05, |
| "loss": 0.5656, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.333104489908005, |
| "grad_norm": 1.0036537647247314, |
| "learning_rate": 7.399267399267399e-05, |
| "loss": 0.6652, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.334202938349581, |
| "grad_norm": 2.06589937210083, |
| "learning_rate": 7.387057387057386e-05, |
| "loss": 0.6502, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.3353013867911576, |
| "grad_norm": 1.1616554260253906, |
| "learning_rate": 7.374847374847375e-05, |
| "loss": 0.7288, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.3363998352327338, |
| "grad_norm": 0.4532950520515442, |
| "learning_rate": 7.362637362637362e-05, |
| "loss": 0.7696, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.33749828367431, |
| "grad_norm": 1.0143449306488037, |
| "learning_rate": 7.35042735042735e-05, |
| "loss": 1.0185, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.3385967321158865, |
| "grad_norm": 2.2059850692749023, |
| "learning_rate": 7.338217338217337e-05, |
| "loss": 0.6267, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.3396951805574626, |
| "grad_norm": 0.4883456826210022, |
| "learning_rate": 7.326007326007325e-05, |
| "loss": 0.6081, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.3407936289990388, |
| "grad_norm": 0.42373138666152954, |
| "learning_rate": 7.313797313797313e-05, |
| "loss": 0.6204, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.3418920774406153, |
| "grad_norm": 0.43958979845046997, |
| "learning_rate": 7.3015873015873e-05, |
| "loss": 0.7608, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.3429905258821915, |
| "grad_norm": 0.4493010342121124, |
| "learning_rate": 7.289377289377289e-05, |
| "loss": 0.5985, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.3440889743237676, |
| "grad_norm": 0.38533085584640503, |
| "learning_rate": 7.277167277167276e-05, |
| "loss": 0.445, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.3451874227653438, |
| "grad_norm": 0.37900710105895996, |
| "learning_rate": 7.264957264957265e-05, |
| "loss": 0.8466, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.3462858712069203, |
| "grad_norm": 1.7598285675048828, |
| "learning_rate": 7.252747252747252e-05, |
| "loss": 0.6881, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.3473843196484965, |
| "grad_norm": 0.5551338791847229, |
| "learning_rate": 7.24053724053724e-05, |
| "loss": 0.5908, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.3484827680900726, |
| "grad_norm": 0.42995861172676086, |
| "learning_rate": 7.228327228327228e-05, |
| "loss": 0.689, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.349581216531649, |
| "grad_norm": 0.6428760290145874, |
| "learning_rate": 7.216117216117216e-05, |
| "loss": 0.5879, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.3506796649732253, |
| "grad_norm": 0.6199445724487305, |
| "learning_rate": 7.203907203907203e-05, |
| "loss": 0.5275, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.3517781134148015, |
| "grad_norm": 0.4687311053276062, |
| "learning_rate": 7.19169719169719e-05, |
| "loss": 0.7046, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.352876561856378, |
| "grad_norm": 0.47645121812820435, |
| "learning_rate": 7.179487179487179e-05, |
| "loss": 0.4787, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.353975010297954, |
| "grad_norm": 1.3774843215942383, |
| "learning_rate": 7.167277167277166e-05, |
| "loss": 0.565, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.3550734587395303, |
| "grad_norm": 0.9585548043251038, |
| "learning_rate": 7.155067155067155e-05, |
| "loss": 0.7496, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.356171907181107, |
| "grad_norm": 0.9073938131332397, |
| "learning_rate": 7.142857142857142e-05, |
| "loss": 0.6785, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.357270355622683, |
| "grad_norm": 1.4543087482452393, |
| "learning_rate": 7.13064713064713e-05, |
| "loss": 0.4827, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.358368804064259, |
| "grad_norm": 0.49685895442962646, |
| "learning_rate": 7.118437118437118e-05, |
| "loss": 0.5624, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.3594672525058353, |
| "grad_norm": 0.3820716142654419, |
| "learning_rate": 7.106227106227105e-05, |
| "loss": 0.5326, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.360565700947412, |
| "grad_norm": 0.6018278002738953, |
| "learning_rate": 7.094017094017094e-05, |
| "loss": 0.7372, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.361664149388988, |
| "grad_norm": 0.49245381355285645, |
| "learning_rate": 7.081807081807082e-05, |
| "loss": 0.714, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.362762597830564, |
| "grad_norm": 0.5913417339324951, |
| "learning_rate": 7.069597069597069e-05, |
| "loss": 0.6395, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.3638610462721408, |
| "grad_norm": 0.3142958879470825, |
| "learning_rate": 7.057387057387056e-05, |
| "loss": 0.4363, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.364959494713717, |
| "grad_norm": 0.44251006841659546, |
| "learning_rate": 7.045177045177044e-05, |
| "loss": 0.5751, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.366057943155293, |
| "grad_norm": 0.7642143964767456, |
| "learning_rate": 7.032967032967032e-05, |
| "loss": 0.9707, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.367156391596869, |
| "grad_norm": 0.3676380217075348, |
| "learning_rate": 7.020757020757021e-05, |
| "loss": 0.6142, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.3682548400384458, |
| "grad_norm": 0.43112027645111084, |
| "learning_rate": 7.008547008547008e-05, |
| "loss": 0.6194, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.369353288480022, |
| "grad_norm": 0.5463792681694031, |
| "learning_rate": 6.996336996336996e-05, |
| "loss": 0.5478, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.370451736921598, |
| "grad_norm": 0.5498053431510925, |
| "learning_rate": 6.984126984126984e-05, |
| "loss": 0.8373, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.3715501853631746, |
| "grad_norm": 0.5144299268722534, |
| "learning_rate": 6.971916971916971e-05, |
| "loss": 0.7033, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.3726486338047508, |
| "grad_norm": 0.4049033522605896, |
| "learning_rate": 6.95970695970696e-05, |
| "loss": 0.6257, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.373747082246327, |
| "grad_norm": 0.8007866740226746, |
| "learning_rate": 6.947496947496947e-05, |
| "loss": 1.1859, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.3748455306879035, |
| "grad_norm": 0.6302816867828369, |
| "learning_rate": 6.935286935286935e-05, |
| "loss": 0.4972, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.3759439791294796, |
| "grad_norm": 0.4181542694568634, |
| "learning_rate": 6.923076923076922e-05, |
| "loss": 0.5543, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.3770424275710558, |
| "grad_norm": 0.45409703254699707, |
| "learning_rate": 6.91086691086691e-05, |
| "loss": 0.6237, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.3781408760126324, |
| "grad_norm": 0.5172666907310486, |
| "learning_rate": 6.898656898656898e-05, |
| "loss": 0.5798, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.3792393244542085, |
| "grad_norm": 0.7849127054214478, |
| "learning_rate": 6.886446886446885e-05, |
| "loss": 0.8282, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.3803377728957846, |
| "grad_norm": 0.4041041135787964, |
| "learning_rate": 6.874236874236874e-05, |
| "loss": 0.5046, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.3814362213373608, |
| "grad_norm": 0.35880064964294434, |
| "learning_rate": 6.862026862026862e-05, |
| "loss": 0.4096, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.3825346697789374, |
| "grad_norm": 0.5949457883834839, |
| "learning_rate": 6.84981684981685e-05, |
| "loss": 0.6666, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.3836331182205135, |
| "grad_norm": 0.6332186460494995, |
| "learning_rate": 6.837606837606837e-05, |
| "loss": 0.9715, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.3847315666620896, |
| "grad_norm": 0.3173432946205139, |
| "learning_rate": 6.825396825396824e-05, |
| "loss": 0.6792, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.385830015103666, |
| "grad_norm": 0.7556782364845276, |
| "learning_rate": 6.813186813186813e-05, |
| "loss": 0.7267, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.3869284635452424, |
| "grad_norm": 0.43191683292388916, |
| "learning_rate": 6.800976800976801e-05, |
| "loss": 0.5841, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.3880269119868185, |
| "grad_norm": 0.4010660946369171, |
| "learning_rate": 6.788766788766788e-05, |
| "loss": 0.7491, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.389125360428395, |
| "grad_norm": 0.6889204382896423, |
| "learning_rate": 6.776556776556775e-05, |
| "loss": 0.4539, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.390223808869971, |
| "grad_norm": 0.4509136974811554, |
| "learning_rate": 6.764346764346764e-05, |
| "loss": 0.7066, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.3913222573115474, |
| "grad_norm": 0.4313298463821411, |
| "learning_rate": 6.752136752136751e-05, |
| "loss": 0.6292, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.392420705753124, |
| "grad_norm": 0.7713265419006348, |
| "learning_rate": 6.73992673992674e-05, |
| "loss": 0.8392, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.3935191541947, |
| "grad_norm": 0.5283428430557251, |
| "learning_rate": 6.727716727716727e-05, |
| "loss": 0.6912, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.394617602636276, |
| "grad_norm": 0.40429314970970154, |
| "learning_rate": 6.715506715506716e-05, |
| "loss": 0.4335, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.3957160510778523, |
| "grad_norm": 0.6888754367828369, |
| "learning_rate": 6.703296703296703e-05, |
| "loss": 0.6276, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.396814499519429, |
| "grad_norm": 0.5595026612281799, |
| "learning_rate": 6.69108669108669e-05, |
| "loss": 0.7806, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.397912947961005, |
| "grad_norm": 0.32394587993621826, |
| "learning_rate": 6.678876678876678e-05, |
| "loss": 0.5531, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.399011396402581, |
| "grad_norm": 0.5909039974212646, |
| "learning_rate": 6.666666666666666e-05, |
| "loss": 0.4932, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.400109844844158, |
| "grad_norm": 0.4148501455783844, |
| "learning_rate": 6.654456654456654e-05, |
| "loss": 0.5637, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.401208293285734, |
| "grad_norm": 0.558403491973877, |
| "learning_rate": 6.642246642246641e-05, |
| "loss": 0.5733, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.40230674172731, |
| "grad_norm": 0.5171149373054504, |
| "learning_rate": 6.630036630036629e-05, |
| "loss": 0.6931, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.403405190168886, |
| "grad_norm": 0.44966164231300354, |
| "learning_rate": 6.617826617826617e-05, |
| "loss": 0.5061, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.404503638610463, |
| "grad_norm": 0.45499417185783386, |
| "learning_rate": 6.605616605616606e-05, |
| "loss": 0.3726, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.405602087052039, |
| "grad_norm": 0.5790139436721802, |
| "learning_rate": 6.593406593406593e-05, |
| "loss": 0.6647, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.4067005354936155, |
| "grad_norm": 0.5948793292045593, |
| "learning_rate": 6.581196581196581e-05, |
| "loss": 0.765, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.4077989839351917, |
| "grad_norm": 0.5925643444061279, |
| "learning_rate": 6.568986568986569e-05, |
| "loss": 0.889, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.408897432376768, |
| "grad_norm": 0.5776219964027405, |
| "learning_rate": 6.556776556776556e-05, |
| "loss": 0.5506, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.409995880818344, |
| "grad_norm": 0.44397997856140137, |
| "learning_rate": 6.544566544566544e-05, |
| "loss": 0.5372, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.4110943292599205, |
| "grad_norm": 0.45733606815338135, |
| "learning_rate": 6.532356532356532e-05, |
| "loss": 0.7207, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.4121927777014966, |
| "grad_norm": 0.38223645091056824, |
| "learning_rate": 6.52014652014652e-05, |
| "loss": 0.5888, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.413291226143073, |
| "grad_norm": 0.3642580211162567, |
| "learning_rate": 6.507936507936507e-05, |
| "loss": 0.5687, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.4143896745846494, |
| "grad_norm": 0.42435723543167114, |
| "learning_rate": 6.495726495726494e-05, |
| "loss": 0.6056, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.4154881230262255, |
| "grad_norm": 0.4998740255832672, |
| "learning_rate": 6.483516483516483e-05, |
| "loss": 0.6813, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.4165865714678016, |
| "grad_norm": 0.47158849239349365, |
| "learning_rate": 6.47130647130647e-05, |
| "loss": 0.5585, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.417685019909378, |
| "grad_norm": 0.4780612289905548, |
| "learning_rate": 6.459096459096459e-05, |
| "loss": 0.4941, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.4187834683509544, |
| "grad_norm": 0.5073630809783936, |
| "learning_rate": 6.446886446886447e-05, |
| "loss": 0.4549, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.4198819167925305, |
| "grad_norm": 0.4311310052871704, |
| "learning_rate": 6.434676434676435e-05, |
| "loss": 0.4419, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.4209803652341066, |
| "grad_norm": 0.3557896316051483, |
| "learning_rate": 6.422466422466422e-05, |
| "loss": 0.6973, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.4220788136756832, |
| "grad_norm": 0.6171516180038452, |
| "learning_rate": 6.410256410256409e-05, |
| "loss": 0.7554, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.4231772621172594, |
| "grad_norm": 0.4687957465648651, |
| "learning_rate": 6.398046398046397e-05, |
| "loss": 0.7429, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.4242757105588355, |
| "grad_norm": 0.8685696125030518, |
| "learning_rate": 6.385836385836386e-05, |
| "loss": 0.5896, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.425374159000412, |
| "grad_norm": 0.39599040150642395, |
| "learning_rate": 6.373626373626373e-05, |
| "loss": 0.4744, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.4264726074419882, |
| "grad_norm": 0.9079630970954895, |
| "learning_rate": 6.36141636141636e-05, |
| "loss": 0.6067, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.4275710558835644, |
| "grad_norm": 0.5051462054252625, |
| "learning_rate": 6.349206349206349e-05, |
| "loss": 0.7314, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.428669504325141, |
| "grad_norm": 0.4899844825267792, |
| "learning_rate": 6.336996336996336e-05, |
| "loss": 0.7086, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.429767952766717, |
| "grad_norm": 0.5135432481765747, |
| "learning_rate": 6.324786324786325e-05, |
| "loss": 0.5261, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.4308664012082932, |
| "grad_norm": 0.6025048494338989, |
| "learning_rate": 6.312576312576312e-05, |
| "loss": 0.5276, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.4319648496498694, |
| "grad_norm": 0.6931442022323608, |
| "learning_rate": 6.3003663003663e-05, |
| "loss": 0.6535, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.433063298091446, |
| "grad_norm": 0.695106565952301, |
| "learning_rate": 6.288156288156288e-05, |
| "loss": 0.9183, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.434161746533022, |
| "grad_norm": 0.450100302696228, |
| "learning_rate": 6.275946275946275e-05, |
| "loss": 0.5049, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.4352601949745982, |
| "grad_norm": 0.5539785623550415, |
| "learning_rate": 6.263736263736263e-05, |
| "loss": 0.5735, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.436358643416175, |
| "grad_norm": 0.5560977458953857, |
| "learning_rate": 6.25152625152625e-05, |
| "loss": 0.7364, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.437457091857751, |
| "grad_norm": 0.740195095539093, |
| "learning_rate": 6.239316239316239e-05, |
| "loss": 0.7839, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.438555540299327, |
| "grad_norm": 0.9324271082878113, |
| "learning_rate": 6.227106227106226e-05, |
| "loss": 0.6365, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.4396539887409037, |
| "grad_norm": 0.5540104508399963, |
| "learning_rate": 6.214896214896215e-05, |
| "loss": 0.6586, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.44075243718248, |
| "grad_norm": 0.5028054714202881, |
| "learning_rate": 6.202686202686202e-05, |
| "loss": 0.4422, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.441850885624056, |
| "grad_norm": 0.7052125930786133, |
| "learning_rate": 6.190476190476189e-05, |
| "loss": 0.7248, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.4429493340656325, |
| "grad_norm": 0.6705207824707031, |
| "learning_rate": 6.178266178266178e-05, |
| "loss": 0.81, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.4440477825072087, |
| "grad_norm": 0.7996514439582825, |
| "learning_rate": 6.166056166056166e-05, |
| "loss": 0.382, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.445146230948785, |
| "grad_norm": 1.5169689655303955, |
| "learning_rate": 6.153846153846154e-05, |
| "loss": 0.7373, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.446244679390361, |
| "grad_norm": 0.8039339780807495, |
| "learning_rate": 6.141636141636141e-05, |
| "loss": 0.8609, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.4473431278319375, |
| "grad_norm": 0.6489125490188599, |
| "learning_rate": 6.129426129426128e-05, |
| "loss": 0.6309, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.4484415762735137, |
| "grad_norm": 0.533184826374054, |
| "learning_rate": 6.117216117216116e-05, |
| "loss": 0.5166, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.44954002471509, |
| "grad_norm": 0.5699225068092346, |
| "learning_rate": 6.105006105006105e-05, |
| "loss": 0.7276, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.4506384731566664, |
| "grad_norm": 0.5552012324333191, |
| "learning_rate": 6.092796092796092e-05, |
| "loss": 0.636, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.4517369215982425, |
| "grad_norm": 0.4785599112510681, |
| "learning_rate": 6.08058608058608e-05, |
| "loss": 0.6362, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.4528353700398187, |
| "grad_norm": 0.740872859954834, |
| "learning_rate": 6.068376068376068e-05, |
| "loss": 0.5603, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.453933818481395, |
| "grad_norm": 0.5217441916465759, |
| "learning_rate": 6.056166056166056e-05, |
| "loss": 0.6306, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.4550322669229714, |
| "grad_norm": 0.446481853723526, |
| "learning_rate": 6.043956043956044e-05, |
| "loss": 0.8156, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.4561307153645475, |
| "grad_norm": 0.6527410745620728, |
| "learning_rate": 6.031746031746031e-05, |
| "loss": 0.7057, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.4572291638061237, |
| "grad_norm": 0.6801958680152893, |
| "learning_rate": 6.019536019536019e-05, |
| "loss": 0.7718, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.4583276122477002, |
| "grad_norm": 1.0723007917404175, |
| "learning_rate": 6.007326007326007e-05, |
| "loss": 0.5552, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.4594260606892764, |
| "grad_norm": 0.4058208763599396, |
| "learning_rate": 5.9951159951159945e-05, |
| "loss": 0.5035, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.4605245091308525, |
| "grad_norm": 0.5384330153465271, |
| "learning_rate": 5.9829059829059824e-05, |
| "loss": 0.5059, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.461622957572429, |
| "grad_norm": 0.7797716856002808, |
| "learning_rate": 5.9706959706959696e-05, |
| "loss": 0.5613, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.4627214060140052, |
| "grad_norm": 2.9689226150512695, |
| "learning_rate": 5.958485958485958e-05, |
| "loss": 0.6219, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.4638198544555814, |
| "grad_norm": 0.47863152623176575, |
| "learning_rate": 5.946275946275946e-05, |
| "loss": 0.5498, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.464918302897158, |
| "grad_norm": 0.49707144498825073, |
| "learning_rate": 5.934065934065933e-05, |
| "loss": 0.775, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.466016751338734, |
| "grad_norm": 0.3437495529651642, |
| "learning_rate": 5.921855921855922e-05, |
| "loss": 0.4592, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.4671151997803102, |
| "grad_norm": 0.7298309206962585, |
| "learning_rate": 5.9096459096459096e-05, |
| "loss": 0.5374, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.4682136482218864, |
| "grad_norm": 0.6666691303253174, |
| "learning_rate": 5.897435897435897e-05, |
| "loss": 0.424, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.469312096663463, |
| "grad_norm": 0.5841661691665649, |
| "learning_rate": 5.8852258852258847e-05, |
| "loss": 0.5316, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.470410545105039, |
| "grad_norm": 0.4921081066131592, |
| "learning_rate": 5.873015873015872e-05, |
| "loss": 0.6901, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.4715089935466152, |
| "grad_norm": 0.4779987633228302, |
| "learning_rate": 5.8608058608058604e-05, |
| "loss": 0.8976, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.472607441988192, |
| "grad_norm": 0.43142780661582947, |
| "learning_rate": 5.848595848595848e-05, |
| "loss": 0.4915, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.473705890429768, |
| "grad_norm": 1.132870078086853, |
| "learning_rate": 5.8363858363858355e-05, |
| "loss": 0.6633, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.474804338871344, |
| "grad_norm": 0.5674893856048584, |
| "learning_rate": 5.824175824175824e-05, |
| "loss": 0.5023, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.4759027873129207, |
| "grad_norm": 0.42495957016944885, |
| "learning_rate": 5.811965811965811e-05, |
| "loss": 0.6544, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.477001235754497, |
| "grad_norm": 0.8031434416770935, |
| "learning_rate": 5.799755799755799e-05, |
| "loss": 0.892, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.478099684196073, |
| "grad_norm": 0.7715115547180176, |
| "learning_rate": 5.7875457875457876e-05, |
| "loss": 0.5659, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.4791981326376495, |
| "grad_norm": 0.6882114410400391, |
| "learning_rate": 5.775335775335775e-05, |
| "loss": 0.5154, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.4802965810792257, |
| "grad_norm": 0.4994114935398102, |
| "learning_rate": 5.763125763125763e-05, |
| "loss": 0.6001, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.481395029520802, |
| "grad_norm": 0.45008450746536255, |
| "learning_rate": 5.7509157509157506e-05, |
| "loss": 0.7076, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.482493477962378, |
| "grad_norm": 0.654270350933075, |
| "learning_rate": 5.738705738705738e-05, |
| "loss": 0.5809, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.4835919264039545, |
| "grad_norm": 0.6344896554946899, |
| "learning_rate": 5.726495726495726e-05, |
| "loss": 0.6059, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.4846903748455307, |
| "grad_norm": 0.44090238213539124, |
| "learning_rate": 5.7142857142857135e-05, |
| "loss": 0.7953, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.485788823287107, |
| "grad_norm": 0.47564128041267395, |
| "learning_rate": 5.7020757020757014e-05, |
| "loss": 0.5062, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.4868872717286834, |
| "grad_norm": 0.3644583225250244, |
| "learning_rate": 5.68986568986569e-05, |
| "loss": 0.6417, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.4879857201702595, |
| "grad_norm": 0.5264548659324646, |
| "learning_rate": 5.677655677655677e-05, |
| "loss": 0.5971, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.4890841686118357, |
| "grad_norm": 0.7300589680671692, |
| "learning_rate": 5.665445665445665e-05, |
| "loss": 0.6249, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.490182617053412, |
| "grad_norm": 0.9016311764717102, |
| "learning_rate": 5.653235653235652e-05, |
| "loss": 0.5761, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.4912810654949884, |
| "grad_norm": 0.7480237483978271, |
| "learning_rate": 5.641025641025641e-05, |
| "loss": 0.4026, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.4923795139365645, |
| "grad_norm": 0.5738864541053772, |
| "learning_rate": 5.6288156288156286e-05, |
| "loss": 0.8657, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.493477962378141, |
| "grad_norm": 0.7320820093154907, |
| "learning_rate": 5.616605616605616e-05, |
| "loss": 0.7341, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.4945764108197173, |
| "grad_norm": 0.7029497623443604, |
| "learning_rate": 5.6043956043956037e-05, |
| "loss": 0.7597, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.4956748592612934, |
| "grad_norm": 0.5160001516342163, |
| "learning_rate": 5.592185592185592e-05, |
| "loss": 0.6488, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.4967733077028695, |
| "grad_norm": 0.5425933003425598, |
| "learning_rate": 5.5799755799755794e-05, |
| "loss": 0.7102, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.497871756144446, |
| "grad_norm": 0.5881295204162598, |
| "learning_rate": 5.567765567765567e-05, |
| "loss": 0.8123, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.4989702045860223, |
| "grad_norm": 0.6021397113800049, |
| "learning_rate": 5.5555555555555545e-05, |
| "loss": 0.8887, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.5000686530275984, |
| "grad_norm": 0.4754411578178406, |
| "learning_rate": 5.543345543345543e-05, |
| "loss": 0.8162, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.501167101469175, |
| "grad_norm": 0.46976983547210693, |
| "learning_rate": 5.531135531135531e-05, |
| "loss": 0.4177, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.502265549910751, |
| "grad_norm": 0.4946482181549072, |
| "learning_rate": 5.518925518925518e-05, |
| "loss": 0.6997, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.5033639983523273, |
| "grad_norm": 0.49166280031204224, |
| "learning_rate": 5.5067155067155066e-05, |
| "loss": 0.6436, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.5044624467939034, |
| "grad_norm": 0.40157628059387207, |
| "learning_rate": 5.494505494505494e-05, |
| "loss": 0.6998, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.50556089523548, |
| "grad_norm": 0.4139937162399292, |
| "learning_rate": 5.482295482295482e-05, |
| "loss": 0.4021, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.506659343677056, |
| "grad_norm": 3.6814892292022705, |
| "learning_rate": 5.4700854700854696e-05, |
| "loss": 0.6402, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.5077577921186327, |
| "grad_norm": 0.3136257529258728, |
| "learning_rate": 5.4578754578754574e-05, |
| "loss": 0.5364, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.508856240560209, |
| "grad_norm": 0.42901432514190674, |
| "learning_rate": 5.445665445665445e-05, |
| "loss": 0.6838, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.509954689001785, |
| "grad_norm": 0.8462406992912292, |
| "learning_rate": 5.433455433455433e-05, |
| "loss": 0.4232, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.511053137443361, |
| "grad_norm": 1.244150996208191, |
| "learning_rate": 5.4212454212454204e-05, |
| "loss": 0.6192, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.5121515858849373, |
| "grad_norm": 0.834296703338623, |
| "learning_rate": 5.409035409035409e-05, |
| "loss": 0.548, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.513250034326514, |
| "grad_norm": 0.4279276430606842, |
| "learning_rate": 5.396825396825396e-05, |
| "loss": 0.7549, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.51434848276809, |
| "grad_norm": 0.5770757794380188, |
| "learning_rate": 5.384615384615384e-05, |
| "loss": 0.6156, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.5154469312096666, |
| "grad_norm": 0.41763821244239807, |
| "learning_rate": 5.3724053724053725e-05, |
| "loss": 0.5019, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.5165453796512427, |
| "grad_norm": 0.5212944746017456, |
| "learning_rate": 5.36019536019536e-05, |
| "loss": 0.6132, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.517643828092819, |
| "grad_norm": 0.44493457674980164, |
| "learning_rate": 5.3479853479853476e-05, |
| "loss": 0.4162, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.518742276534395, |
| "grad_norm": 0.46922022104263306, |
| "learning_rate": 5.335775335775335e-05, |
| "loss": 0.4624, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.5198407249759716, |
| "grad_norm": 0.41906213760375977, |
| "learning_rate": 5.3235653235653233e-05, |
| "loss": 0.612, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.5209391734175477, |
| "grad_norm": 0.620276153087616, |
| "learning_rate": 5.311355311355311e-05, |
| "loss": 0.6322, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.522037621859124, |
| "grad_norm": 0.6597051620483398, |
| "learning_rate": 5.2991452991452984e-05, |
| "loss": 0.7659, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.5231360703007004, |
| "grad_norm": 4.377660274505615, |
| "learning_rate": 5.286935286935286e-05, |
| "loss": 0.8294, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.5242345187422766, |
| "grad_norm": 0.6086331009864807, |
| "learning_rate": 5.274725274725275e-05, |
| "loss": 0.5164, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.5253329671838527, |
| "grad_norm": 0.5100352168083191, |
| "learning_rate": 5.262515262515262e-05, |
| "loss": 0.6319, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.526431415625429, |
| "grad_norm": 0.6642487049102783, |
| "learning_rate": 5.25030525030525e-05, |
| "loss": 0.533, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.5275298640670054, |
| "grad_norm": 0.5834927558898926, |
| "learning_rate": 5.238095238095237e-05, |
| "loss": 0.5669, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.5286283125085816, |
| "grad_norm": 0.530815064907074, |
| "learning_rate": 5.2258852258852256e-05, |
| "loss": 0.6189, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.529726760950158, |
| "grad_norm": 0.6275864243507385, |
| "learning_rate": 5.2136752136752135e-05, |
| "loss": 0.8403, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.5308252093917343, |
| "grad_norm": 0.5878366827964783, |
| "learning_rate": 5.201465201465201e-05, |
| "loss": 0.6176, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.5319236578333104, |
| "grad_norm": 0.37410980463027954, |
| "learning_rate": 5.189255189255189e-05, |
| "loss": 0.6337, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.5330221062748866, |
| "grad_norm": 0.43912917375564575, |
| "learning_rate": 5.1770451770451764e-05, |
| "loss": 0.5348, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.534120554716463, |
| "grad_norm": 1.4737471342086792, |
| "learning_rate": 5.164835164835164e-05, |
| "loss": 0.4862, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.5352190031580393, |
| "grad_norm": 0.3978705108165741, |
| "learning_rate": 5.152625152625152e-05, |
| "loss": 0.7929, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.5363174515996154, |
| "grad_norm": 0.3852058947086334, |
| "learning_rate": 5.14041514041514e-05, |
| "loss": 0.5895, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.537415900041192, |
| "grad_norm": 17.968448638916016, |
| "learning_rate": 5.128205128205128e-05, |
| "loss": 0.4661, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.538514348482768, |
| "grad_norm": 0.9369175434112549, |
| "learning_rate": 5.115995115995115e-05, |
| "loss": 0.5957, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.5396127969243443, |
| "grad_norm": 0.612750768661499, |
| "learning_rate": 5.103785103785103e-05, |
| "loss": 0.6786, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.5407112453659204, |
| "grad_norm": 0.588512659072876, |
| "learning_rate": 5.0915750915750915e-05, |
| "loss": 1.0482, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.541809693807497, |
| "grad_norm": 0.4964143633842468, |
| "learning_rate": 5.079365079365079e-05, |
| "loss": 0.5673, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.542908142249073, |
| "grad_norm": 0.5807982683181763, |
| "learning_rate": 5.0671550671550666e-05, |
| "loss": 0.5493, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.5440065906906497, |
| "grad_norm": 0.5131386518478394, |
| "learning_rate": 5.054945054945055e-05, |
| "loss": 0.5947, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.545105039132226, |
| "grad_norm": 0.4521124064922333, |
| "learning_rate": 5.0427350427350424e-05, |
| "loss": 0.5554, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.546203487573802, |
| "grad_norm": 0.9441378712654114, |
| "learning_rate": 5.03052503052503e-05, |
| "loss": 0.6991, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.547301936015378, |
| "grad_norm": 0.6353013515472412, |
| "learning_rate": 5.0183150183150174e-05, |
| "loss": 0.5308, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.5484003844569547, |
| "grad_norm": 0.5940631628036499, |
| "learning_rate": 5.006105006105006e-05, |
| "loss": 0.6536, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.549498832898531, |
| "grad_norm": 0.5457591414451599, |
| "learning_rate": 4.993894993894994e-05, |
| "loss": 0.6927, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.550597281340107, |
| "grad_norm": 0.6265937685966492, |
| "learning_rate": 4.981684981684981e-05, |
| "loss": 0.6341, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.5516957297816836, |
| "grad_norm": 0.5842925310134888, |
| "learning_rate": 4.969474969474969e-05, |
| "loss": 0.4583, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.5527941782232597, |
| "grad_norm": 0.5363351106643677, |
| "learning_rate": 4.957264957264956e-05, |
| "loss": 0.6882, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.553892626664836, |
| "grad_norm": 0.3677682876586914, |
| "learning_rate": 4.9450549450549446e-05, |
| "loss": 0.5671, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.554991075106412, |
| "grad_norm": 1.222985863685608, |
| "learning_rate": 4.9328449328449325e-05, |
| "loss": 0.4936, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.5560895235479886, |
| "grad_norm": 1.187898874282837, |
| "learning_rate": 4.92063492063492e-05, |
| "loss": 0.4893, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.5571879719895647, |
| "grad_norm": 0.38843801617622375, |
| "learning_rate": 4.908424908424908e-05, |
| "loss": 0.6512, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.558286420431141, |
| "grad_norm": 0.9550036191940308, |
| "learning_rate": 4.896214896214896e-05, |
| "loss": 0.6055, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.5593848688727174, |
| "grad_norm": 0.80762779712677, |
| "learning_rate": 4.884004884004883e-05, |
| "loss": 0.8852, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.5604833173142936, |
| "grad_norm": 0.7496643662452698, |
| "learning_rate": 4.871794871794872e-05, |
| "loss": 0.6535, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.5615817657558697, |
| "grad_norm": 0.5532578825950623, |
| "learning_rate": 4.859584859584859e-05, |
| "loss": 0.6336, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.562680214197446, |
| "grad_norm": 0.4058012366294861, |
| "learning_rate": 4.847374847374847e-05, |
| "loss": 0.6529, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.5637786626390224, |
| "grad_norm": 3.1913115978240967, |
| "learning_rate": 4.835164835164835e-05, |
| "loss": 0.548, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.5648771110805986, |
| "grad_norm": 0.47375988960266113, |
| "learning_rate": 4.822954822954822e-05, |
| "loss": 0.7567, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.565975559522175, |
| "grad_norm": 0.5287726521492004, |
| "learning_rate": 4.8107448107448106e-05, |
| "loss": 0.6009, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.5670740079637513, |
| "grad_norm": 0.43966931104660034, |
| "learning_rate": 4.798534798534798e-05, |
| "loss": 0.5538, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.5681724564053274, |
| "grad_norm": 0.6683239340782166, |
| "learning_rate": 4.7863247863247856e-05, |
| "loss": 0.3999, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.5692709048469036, |
| "grad_norm": 0.5260687470436096, |
| "learning_rate": 4.774114774114774e-05, |
| "loss": 0.7212, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.57036935328848, |
| "grad_norm": 1.086850881576538, |
| "learning_rate": 4.7619047619047614e-05, |
| "loss": 0.7439, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.5714678017300563, |
| "grad_norm": 0.9744517207145691, |
| "learning_rate": 4.749694749694749e-05, |
| "loss": 0.5625, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.5725662501716324, |
| "grad_norm": 0.6829352974891663, |
| "learning_rate": 4.737484737484738e-05, |
| "loss": 0.5241, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.573664698613209, |
| "grad_norm": 0.9441612958908081, |
| "learning_rate": 4.725274725274725e-05, |
| "loss": 0.8815, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.574763147054785, |
| "grad_norm": 0.9406607151031494, |
| "learning_rate": 4.713064713064713e-05, |
| "loss": 0.7176, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.5758615954963613, |
| "grad_norm": 0.6601364016532898, |
| "learning_rate": 4.7008547008547e-05, |
| "loss": 0.7713, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.5769600439379374, |
| "grad_norm": 2.5189599990844727, |
| "learning_rate": 4.688644688644688e-05, |
| "loss": 0.5572, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.578058492379514, |
| "grad_norm": 0.7295210957527161, |
| "learning_rate": 4.6764346764346765e-05, |
| "loss": 0.4431, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.57915694082109, |
| "grad_norm": 0.5053385496139526, |
| "learning_rate": 4.6642246642246637e-05, |
| "loss": 0.4881, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.5802553892626667, |
| "grad_norm": 0.6556063890457153, |
| "learning_rate": 4.6520146520146515e-05, |
| "loss": 0.5168, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.581353837704243, |
| "grad_norm": 0.37052014470100403, |
| "learning_rate": 4.639804639804639e-05, |
| "loss": 0.3954, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.582452286145819, |
| "grad_norm": 0.5975561738014221, |
| "learning_rate": 4.627594627594627e-05, |
| "loss": 0.5714, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.583550734587395, |
| "grad_norm": 0.7273014187812805, |
| "learning_rate": 4.615384615384615e-05, |
| "loss": 0.7287, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.5846491830289717, |
| "grad_norm": 0.566586971282959, |
| "learning_rate": 4.603174603174602e-05, |
| "loss": 0.5589, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.585747631470548, |
| "grad_norm": 0.5846517086029053, |
| "learning_rate": 4.590964590964591e-05, |
| "loss": 0.5061, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.586846079912124, |
| "grad_norm": 0.7470859885215759, |
| "learning_rate": 4.578754578754579e-05, |
| "loss": 0.5433, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.5879445283537006, |
| "grad_norm": 0.5419175624847412, |
| "learning_rate": 4.566544566544566e-05, |
| "loss": 0.5502, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.5890429767952767, |
| "grad_norm": 1.507851004600525, |
| "learning_rate": 4.554334554334554e-05, |
| "loss": 0.7399, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.590141425236853, |
| "grad_norm": 1.4420006275177002, |
| "learning_rate": 4.542124542124542e-05, |
| "loss": 0.4233, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.591239873678429, |
| "grad_norm": 0.6471789479255676, |
| "learning_rate": 4.5299145299145296e-05, |
| "loss": 0.4052, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.5923383221200056, |
| "grad_norm": 0.5886567831039429, |
| "learning_rate": 4.5177045177045174e-05, |
| "loss": 0.7197, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.5934367705615817, |
| "grad_norm": 0.843024492263794, |
| "learning_rate": 4.5054945054945046e-05, |
| "loss": 0.7636, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.5945352190031583, |
| "grad_norm": 0.8689064979553223, |
| "learning_rate": 4.493284493284493e-05, |
| "loss": 0.6694, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.5956336674447344, |
| "grad_norm": 0.5112485289573669, |
| "learning_rate": 4.4810744810744804e-05, |
| "loss": 0.5338, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.5967321158863106, |
| "grad_norm": 0.4828614294528961, |
| "learning_rate": 4.468864468864468e-05, |
| "loss": 0.8519, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.5978305643278867, |
| "grad_norm": 0.5644575357437134, |
| "learning_rate": 4.456654456654457e-05, |
| "loss": 0.5605, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.598929012769463, |
| "grad_norm": 0.7749584913253784, |
| "learning_rate": 4.444444444444444e-05, |
| "loss": 0.6697, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.6000274612110394, |
| "grad_norm": 0.9038271307945251, |
| "learning_rate": 4.432234432234432e-05, |
| "loss": 0.7242, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.6011259096526156, |
| "grad_norm": 0.5102944374084473, |
| "learning_rate": 4.42002442002442e-05, |
| "loss": 0.5841, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.602224358094192, |
| "grad_norm": 0.5072823762893677, |
| "learning_rate": 4.4078144078144076e-05, |
| "loss": 0.4927, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.6033228065357683, |
| "grad_norm": 0.3654184341430664, |
| "learning_rate": 4.3956043956043955e-05, |
| "loss": 0.6449, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.6044212549773444, |
| "grad_norm": 1.7309939861297607, |
| "learning_rate": 4.3833943833943827e-05, |
| "loss": 0.6979, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.6055197034189206, |
| "grad_norm": 0.7982075214385986, |
| "learning_rate": 4.3711843711843705e-05, |
| "loss": 0.6589, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.606618151860497, |
| "grad_norm": 0.6989462375640869, |
| "learning_rate": 4.358974358974359e-05, |
| "loss": 0.7104, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.6077166003020733, |
| "grad_norm": 0.7331676483154297, |
| "learning_rate": 4.346764346764346e-05, |
| "loss": 0.7565, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.6088150487436494, |
| "grad_norm": 1.0566400289535522, |
| "learning_rate": 4.334554334554334e-05, |
| "loss": 0.6967, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.609913497185226, |
| "grad_norm": 0.5988017320632935, |
| "learning_rate": 4.322344322344321e-05, |
| "loss": 0.7871, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.611011945626802, |
| "grad_norm": 0.4248102307319641, |
| "learning_rate": 4.31013431013431e-05, |
| "loss": 0.6891, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.6121103940683783, |
| "grad_norm": 1.9839611053466797, |
| "learning_rate": 4.297924297924298e-05, |
| "loss": 0.6647, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.6132088425099544, |
| "grad_norm": 0.4382665455341339, |
| "learning_rate": 4.285714285714285e-05, |
| "loss": 0.5969, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.614307290951531, |
| "grad_norm": 1.1918715238571167, |
| "learning_rate": 4.2735042735042735e-05, |
| "loss": 0.7788, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.615405739393107, |
| "grad_norm": 0.38117820024490356, |
| "learning_rate": 4.2612942612942614e-05, |
| "loss": 0.4967, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.6165041878346837, |
| "grad_norm": 0.6454489827156067, |
| "learning_rate": 4.2490842490842486e-05, |
| "loss": 0.7724, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.61760263627626, |
| "grad_norm": 1.0696319341659546, |
| "learning_rate": 4.2368742368742364e-05, |
| "loss": 0.5292, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.618701084717836, |
| "grad_norm": 0.5887579321861267, |
| "learning_rate": 4.224664224664224e-05, |
| "loss": 0.5317, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.619799533159412, |
| "grad_norm": 0.557188093662262, |
| "learning_rate": 4.212454212454212e-05, |
| "loss": 0.7172, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.6208979816009887, |
| "grad_norm": 0.5122195482254028, |
| "learning_rate": 4.2002442002442e-05, |
| "loss": 0.6398, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.621996430042565, |
| "grad_norm": 0.520722508430481, |
| "learning_rate": 4.188034188034187e-05, |
| "loss": 0.3984, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.623094878484141, |
| "grad_norm": 1.2077422142028809, |
| "learning_rate": 4.175824175824176e-05, |
| "loss": 0.6686, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.6241933269257176, |
| "grad_norm": 1.1437829732894897, |
| "learning_rate": 4.163614163614163e-05, |
| "loss": 0.6653, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.6252917753672937, |
| "grad_norm": 0.6157158017158508, |
| "learning_rate": 4.151404151404151e-05, |
| "loss": 0.7074, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.62639022380887, |
| "grad_norm": 1.8944931030273438, |
| "learning_rate": 4.1391941391941394e-05, |
| "loss": 0.5991, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.627488672250446, |
| "grad_norm": 0.6598528623580933, |
| "learning_rate": 4.1269841269841266e-05, |
| "loss": 0.6051, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.6285871206920226, |
| "grad_norm": 0.9341129660606384, |
| "learning_rate": 4.1147741147741145e-05, |
| "loss": 0.3795, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.6296855691335987, |
| "grad_norm": 0.4246079921722412, |
| "learning_rate": 4.1025641025641023e-05, |
| "loss": 0.4603, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.6307840175751753, |
| "grad_norm": 0.6639881134033203, |
| "learning_rate": 4.09035409035409e-05, |
| "loss": 0.5862, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.6318824660167515, |
| "grad_norm": 1.297917366027832, |
| "learning_rate": 4.078144078144078e-05, |
| "loss": 0.6175, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.6329809144583276, |
| "grad_norm": 0.7880698442459106, |
| "learning_rate": 4.065934065934065e-05, |
| "loss": 0.7034, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.6340793628999037, |
| "grad_norm": 0.6197066903114319, |
| "learning_rate": 4.053724053724053e-05, |
| "loss": 0.659, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.6351778113414803, |
| "grad_norm": 0.7560408711433411, |
| "learning_rate": 4.041514041514042e-05, |
| "loss": 0.5543, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.6362762597830565, |
| "grad_norm": 2.2571635246276855, |
| "learning_rate": 4.029304029304029e-05, |
| "loss": 0.712, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.6373747082246326, |
| "grad_norm": 0.8119613528251648, |
| "learning_rate": 4.017094017094017e-05, |
| "loss": 0.6407, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.638473156666209, |
| "grad_norm": 3.9773592948913574, |
| "learning_rate": 4.004884004884004e-05, |
| "loss": 0.6434, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.6395716051077853, |
| "grad_norm": 1.2648125886917114, |
| "learning_rate": 3.9926739926739925e-05, |
| "loss": 0.689, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.6406700535493615, |
| "grad_norm": 0.7015364170074463, |
| "learning_rate": 3.9804639804639804e-05, |
| "loss": 0.4175, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.6417685019909376, |
| "grad_norm": 0.941303551197052, |
| "learning_rate": 3.9682539682539676e-05, |
| "loss": 0.4126, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.642866950432514, |
| "grad_norm": 0.7533726096153259, |
| "learning_rate": 3.956043956043956e-05, |
| "loss": 0.7401, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.6439653988740903, |
| "grad_norm": 0.5480525493621826, |
| "learning_rate": 3.943833943833943e-05, |
| "loss": 0.5567, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.6450638473156665, |
| "grad_norm": 0.6171422004699707, |
| "learning_rate": 3.931623931623931e-05, |
| "loss": 0.721, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.646162295757243, |
| "grad_norm": 0.6719728708267212, |
| "learning_rate": 3.919413919413919e-05, |
| "loss": 0.5015, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.647260744198819, |
| "grad_norm": 1.8106555938720703, |
| "learning_rate": 3.907203907203906e-05, |
| "loss": 0.6954, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.6483591926403953, |
| "grad_norm": 0.42534878849983215, |
| "learning_rate": 3.894993894993895e-05, |
| "loss": 0.5241, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.6494576410819715, |
| "grad_norm": 0.8733202219009399, |
| "learning_rate": 3.882783882783883e-05, |
| "loss": 0.4485, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.650556089523548, |
| "grad_norm": 0.9050257802009583, |
| "learning_rate": 3.87057387057387e-05, |
| "loss": 0.6202, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.651654537965124, |
| "grad_norm": 0.650347888469696, |
| "learning_rate": 3.8583638583638584e-05, |
| "loss": 0.621, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.6527529864067008, |
| "grad_norm": 6.092042446136475, |
| "learning_rate": 3.8461538461538456e-05, |
| "loss": 0.5143, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.653851434848277, |
| "grad_norm": 0.7801241874694824, |
| "learning_rate": 3.8339438339438335e-05, |
| "loss": 0.5424, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.654949883289853, |
| "grad_norm": 0.5492686629295349, |
| "learning_rate": 3.821733821733822e-05, |
| "loss": 0.642, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.656048331731429, |
| "grad_norm": 0.4257514774799347, |
| "learning_rate": 3.809523809523809e-05, |
| "loss": 0.8273, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.6571467801730058, |
| "grad_norm": 1.0180964469909668, |
| "learning_rate": 3.797313797313797e-05, |
| "loss": 0.6962, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.658245228614582, |
| "grad_norm": 0.3844882547855377, |
| "learning_rate": 3.785103785103784e-05, |
| "loss": 0.7315, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.659343677056158, |
| "grad_norm": 0.46182385087013245, |
| "learning_rate": 3.772893772893772e-05, |
| "loss": 0.3889, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.6604421254977346, |
| "grad_norm": 0.562627375125885, |
| "learning_rate": 3.760683760683761e-05, |
| "loss": 0.6415, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.6615405739393108, |
| "grad_norm": 0.3234645128250122, |
| "learning_rate": 3.7484737484737486e-05, |
| "loss": 0.4819, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.662639022380887, |
| "grad_norm": 0.6804205775260925, |
| "learning_rate": 3.736263736263736e-05, |
| "loss": 0.4248, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.663737470822463, |
| "grad_norm": 0.5543864369392395, |
| "learning_rate": 3.7240537240537236e-05, |
| "loss": 0.5259, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.6648359192640396, |
| "grad_norm": 0.8411497473716736, |
| "learning_rate": 3.7118437118437115e-05, |
| "loss": 0.5448, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.6659343677056158, |
| "grad_norm": 0.4386245608329773, |
| "learning_rate": 3.6996336996336994e-05, |
| "loss": 0.9601, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.6670328161471923, |
| "grad_norm": 0.773210346698761, |
| "learning_rate": 3.687423687423687e-05, |
| "loss": 0.8601, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.6681312645887685, |
| "grad_norm": 0.4636232852935791, |
| "learning_rate": 3.675213675213675e-05, |
| "loss": 0.6322, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.6692297130303446, |
| "grad_norm": 1.6318496465682983, |
| "learning_rate": 3.663003663003662e-05, |
| "loss": 0.4402, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.6703281614719208, |
| "grad_norm": 0.5299782156944275, |
| "learning_rate": 3.65079365079365e-05, |
| "loss": 0.5622, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.6714266099134973, |
| "grad_norm": 1.1223825216293335, |
| "learning_rate": 3.638583638583638e-05, |
| "loss": 0.5994, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.6725250583550735, |
| "grad_norm": 1.8495402336120605, |
| "learning_rate": 3.626373626373626e-05, |
| "loss": 0.669, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.6736235067966496, |
| "grad_norm": 0.4963383972644806, |
| "learning_rate": 3.614163614163614e-05, |
| "loss": 0.5412, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.674721955238226, |
| "grad_norm": 0.5644822716712952, |
| "learning_rate": 3.601953601953602e-05, |
| "loss": 0.5768, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.6758204036798023, |
| "grad_norm": 0.5272318720817566, |
| "learning_rate": 3.5897435897435896e-05, |
| "loss": 0.5909, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.6769188521213785, |
| "grad_norm": 0.29838863015174866, |
| "learning_rate": 3.5775335775335774e-05, |
| "loss": 0.5625, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.6780173005629546, |
| "grad_norm": 0.5375344157218933, |
| "learning_rate": 3.565323565323565e-05, |
| "loss": 0.5932, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.679115749004531, |
| "grad_norm": 0.7850833535194397, |
| "learning_rate": 3.5531135531135525e-05, |
| "loss": 0.6706, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.6802141974461073, |
| "grad_norm": 0.5286651253700256, |
| "learning_rate": 3.540903540903541e-05, |
| "loss": 0.6865, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.681312645887684, |
| "grad_norm": 0.9832364320755005, |
| "learning_rate": 3.528693528693528e-05, |
| "loss": 0.7941, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.68241109432926, |
| "grad_norm": 0.4431805908679962, |
| "learning_rate": 3.516483516483516e-05, |
| "loss": 0.4706, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.683509542770836, |
| "grad_norm": 1.7264482975006104, |
| "learning_rate": 3.504273504273504e-05, |
| "loss": 0.6308, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.6846079912124123, |
| "grad_norm": 0.6196084022521973, |
| "learning_rate": 3.492063492063492e-05, |
| "loss": 1.0233, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.6857064396539885, |
| "grad_norm": 0.855876088142395, |
| "learning_rate": 3.47985347985348e-05, |
| "loss": 0.5522, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.686804888095565, |
| "grad_norm": 0.45323798060417175, |
| "learning_rate": 3.4676434676434676e-05, |
| "loss": 0.6232, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.687903336537141, |
| "grad_norm": 0.577273964881897, |
| "learning_rate": 3.455433455433455e-05, |
| "loss": 0.5051, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.689001784978718, |
| "grad_norm": 0.4999620020389557, |
| "learning_rate": 3.4432234432234427e-05, |
| "loss": 0.4881, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.690100233420294, |
| "grad_norm": 0.5028046369552612, |
| "learning_rate": 3.431013431013431e-05, |
| "loss": 0.6575, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.69119868186187, |
| "grad_norm": 2.122028350830078, |
| "learning_rate": 3.4188034188034184e-05, |
| "loss": 0.7226, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.692297130303446, |
| "grad_norm": 0.4979703426361084, |
| "learning_rate": 3.406593406593406e-05, |
| "loss": 0.5768, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.693395578745023, |
| "grad_norm": 0.9270527958869934, |
| "learning_rate": 3.394383394383394e-05, |
| "loss": 0.6464, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.694494027186599, |
| "grad_norm": 1.0739809274673462, |
| "learning_rate": 3.382173382173382e-05, |
| "loss": 0.753, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.695592475628175, |
| "grad_norm": 0.6039335131645203, |
| "learning_rate": 3.36996336996337e-05, |
| "loss": 0.7909, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.6966909240697516, |
| "grad_norm": 0.49040424823760986, |
| "learning_rate": 3.357753357753358e-05, |
| "loss": 0.6112, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.6977893725113278, |
| "grad_norm": 0.6890440583229065, |
| "learning_rate": 3.345543345543345e-05, |
| "loss": 0.6849, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.698887820952904, |
| "grad_norm": 0.7819212675094604, |
| "learning_rate": 3.333333333333333e-05, |
| "loss": 0.6797, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.69998626939448, |
| "grad_norm": 1.0147050619125366, |
| "learning_rate": 3.321123321123321e-05, |
| "loss": 0.6867, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.7010847178360566, |
| "grad_norm": 1.3562036752700806, |
| "learning_rate": 3.3089133089133086e-05, |
| "loss": 0.7811, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.7021831662776328, |
| "grad_norm": 0.5813838839530945, |
| "learning_rate": 3.2967032967032964e-05, |
| "loss": 0.5405, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.7032816147192094, |
| "grad_norm": 0.6152640581130981, |
| "learning_rate": 3.284493284493284e-05, |
| "loss": 0.425, |
| "step": 2461 |
| }, |
| { |
| "epoch": 2.7043800631607855, |
| "grad_norm": 1.1984590291976929, |
| "learning_rate": 3.272283272283272e-05, |
| "loss": 0.592, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.7054785116023616, |
| "grad_norm": 0.48487693071365356, |
| "learning_rate": 3.26007326007326e-05, |
| "loss": 0.5223, |
| "step": 2463 |
| }, |
| { |
| "epoch": 2.7065769600439378, |
| "grad_norm": 0.47191065549850464, |
| "learning_rate": 3.247863247863247e-05, |
| "loss": 0.6479, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.7076754084855144, |
| "grad_norm": 1.3167297840118408, |
| "learning_rate": 3.235653235653235e-05, |
| "loss": 0.4552, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.7087738569270905, |
| "grad_norm": 1.3219714164733887, |
| "learning_rate": 3.2234432234432237e-05, |
| "loss": 0.5839, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.7098723053686666, |
| "grad_norm": 0.8047394752502441, |
| "learning_rate": 3.211233211233211e-05, |
| "loss": 0.795, |
| "step": 2467 |
| }, |
| { |
| "epoch": 2.710970753810243, |
| "grad_norm": 0.6053475737571716, |
| "learning_rate": 3.199023199023199e-05, |
| "loss": 0.743, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.7120692022518194, |
| "grad_norm": 0.4619985818862915, |
| "learning_rate": 3.1868131868131866e-05, |
| "loss": 0.642, |
| "step": 2469 |
| }, |
| { |
| "epoch": 2.7131676506933955, |
| "grad_norm": 0.8241426944732666, |
| "learning_rate": 3.1746031746031745e-05, |
| "loss": 0.521, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.7142660991349716, |
| "grad_norm": 0.4344565272331238, |
| "learning_rate": 3.162393162393162e-05, |
| "loss": 0.4615, |
| "step": 2471 |
| }, |
| { |
| "epoch": 2.715364547576548, |
| "grad_norm": 0.9640605449676514, |
| "learning_rate": 3.15018315018315e-05, |
| "loss": 0.4735, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.7164629960181244, |
| "grad_norm": 0.49423810839653015, |
| "learning_rate": 3.1379731379731374e-05, |
| "loss": 0.7547, |
| "step": 2473 |
| }, |
| { |
| "epoch": 2.717561444459701, |
| "grad_norm": 0.7234408855438232, |
| "learning_rate": 3.125763125763125e-05, |
| "loss": 0.464, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.718659892901277, |
| "grad_norm": 0.542647123336792, |
| "learning_rate": 3.113553113553113e-05, |
| "loss": 0.5563, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.719758341342853, |
| "grad_norm": 0.555722177028656, |
| "learning_rate": 3.101343101343101e-05, |
| "loss": 0.6899, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.7208567897844294, |
| "grad_norm": 0.6171600222587585, |
| "learning_rate": 3.089133089133089e-05, |
| "loss": 0.6088, |
| "step": 2477 |
| }, |
| { |
| "epoch": 2.7219552382260055, |
| "grad_norm": 0.9118738770484924, |
| "learning_rate": 3.076923076923077e-05, |
| "loss": 0.7778, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.723053686667582, |
| "grad_norm": 0.6610655784606934, |
| "learning_rate": 3.064713064713064e-05, |
| "loss": 0.6935, |
| "step": 2479 |
| }, |
| { |
| "epoch": 2.724152135109158, |
| "grad_norm": 0.6729289889335632, |
| "learning_rate": 3.0525030525030525e-05, |
| "loss": 0.792, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.725250583550735, |
| "grad_norm": 0.4955647587776184, |
| "learning_rate": 3.04029304029304e-05, |
| "loss": 0.6746, |
| "step": 2481 |
| }, |
| { |
| "epoch": 2.726349031992311, |
| "grad_norm": 0.42975953221321106, |
| "learning_rate": 3.028083028083028e-05, |
| "loss": 0.5318, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.727447480433887, |
| "grad_norm": 0.3555055856704712, |
| "learning_rate": 3.0158730158730154e-05, |
| "loss": 0.6377, |
| "step": 2483 |
| }, |
| { |
| "epoch": 2.728545928875463, |
| "grad_norm": 3.138209342956543, |
| "learning_rate": 3.0036630036630036e-05, |
| "loss": 0.6296, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.72964437731704, |
| "grad_norm": 0.5710242390632629, |
| "learning_rate": 2.9914529914529912e-05, |
| "loss": 0.8987, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.730742825758616, |
| "grad_norm": 0.5200769305229187, |
| "learning_rate": 2.979242979242979e-05, |
| "loss": 0.5154, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.731841274200192, |
| "grad_norm": 0.797572910785675, |
| "learning_rate": 2.9670329670329666e-05, |
| "loss": 0.8039, |
| "step": 2487 |
| }, |
| { |
| "epoch": 2.7329397226417687, |
| "grad_norm": 0.4667447805404663, |
| "learning_rate": 2.9548229548229548e-05, |
| "loss": 0.586, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.734038171083345, |
| "grad_norm": 0.5500869154930115, |
| "learning_rate": 2.9426129426129423e-05, |
| "loss": 0.7007, |
| "step": 2489 |
| }, |
| { |
| "epoch": 2.735136619524921, |
| "grad_norm": 0.5311625003814697, |
| "learning_rate": 2.9304029304029302e-05, |
| "loss": 0.4257, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.736235067966497, |
| "grad_norm": 0.6474941968917847, |
| "learning_rate": 2.9181929181929177e-05, |
| "loss": 0.4747, |
| "step": 2491 |
| }, |
| { |
| "epoch": 2.7373335164080737, |
| "grad_norm": 1.1186646223068237, |
| "learning_rate": 2.9059829059829056e-05, |
| "loss": 0.8177, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.73843196484965, |
| "grad_norm": 2.455371379852295, |
| "learning_rate": 2.8937728937728938e-05, |
| "loss": 0.6535, |
| "step": 2493 |
| }, |
| { |
| "epoch": 2.7395304132912264, |
| "grad_norm": 0.5033484101295471, |
| "learning_rate": 2.8815628815628813e-05, |
| "loss": 0.525, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.7406288617328025, |
| "grad_norm": 0.5826357007026672, |
| "learning_rate": 2.869352869352869e-05, |
| "loss": 0.476, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.7417273101743787, |
| "grad_norm": 0.5875104665756226, |
| "learning_rate": 2.8571428571428567e-05, |
| "loss": 0.6903, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.742825758615955, |
| "grad_norm": 0.6006028056144714, |
| "learning_rate": 2.844932844932845e-05, |
| "loss": 0.8522, |
| "step": 2497 |
| }, |
| { |
| "epoch": 2.7439242070575314, |
| "grad_norm": 0.5605003833770752, |
| "learning_rate": 2.8327228327228325e-05, |
| "loss": 0.5312, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.7450226554991075, |
| "grad_norm": 0.7641153931617737, |
| "learning_rate": 2.8205128205128204e-05, |
| "loss": 0.6841, |
| "step": 2499 |
| }, |
| { |
| "epoch": 2.7461211039406836, |
| "grad_norm": 0.5523414015769958, |
| "learning_rate": 2.808302808302808e-05, |
| "loss": 0.6582, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.7472195523822602, |
| "grad_norm": 0.40714672207832336, |
| "learning_rate": 2.796092796092796e-05, |
| "loss": 0.7493, |
| "step": 2501 |
| }, |
| { |
| "epoch": 2.7483180008238364, |
| "grad_norm": 0.6960926651954651, |
| "learning_rate": 2.7838827838827836e-05, |
| "loss": 0.7104, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.7494164492654125, |
| "grad_norm": 0.42409783601760864, |
| "learning_rate": 2.7716727716727715e-05, |
| "loss": 0.5643, |
| "step": 2503 |
| }, |
| { |
| "epoch": 2.7505148977069886, |
| "grad_norm": 0.5174455046653748, |
| "learning_rate": 2.759462759462759e-05, |
| "loss": 0.4545, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.7516133461485652, |
| "grad_norm": 0.6353528499603271, |
| "learning_rate": 2.747252747252747e-05, |
| "loss": 0.5068, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.7527117945901414, |
| "grad_norm": 0.46814125776290894, |
| "learning_rate": 2.7350427350427348e-05, |
| "loss": 0.7979, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.753810243031718, |
| "grad_norm": 0.7229417562484741, |
| "learning_rate": 2.7228327228327227e-05, |
| "loss": 0.6212, |
| "step": 2507 |
| }, |
| { |
| "epoch": 2.754908691473294, |
| "grad_norm": 1.2155603170394897, |
| "learning_rate": 2.7106227106227102e-05, |
| "loss": 0.8444, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.7560071399148702, |
| "grad_norm": 0.462703138589859, |
| "learning_rate": 2.698412698412698e-05, |
| "loss": 0.8263, |
| "step": 2509 |
| }, |
| { |
| "epoch": 2.7571055883564464, |
| "grad_norm": 0.9474642872810364, |
| "learning_rate": 2.6862026862026863e-05, |
| "loss": 0.7586, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.758204036798023, |
| "grad_norm": 4.502622127532959, |
| "learning_rate": 2.6739926739926738e-05, |
| "loss": 0.5806, |
| "step": 2511 |
| }, |
| { |
| "epoch": 2.759302485239599, |
| "grad_norm": 1.1251213550567627, |
| "learning_rate": 2.6617826617826617e-05, |
| "loss": 0.6333, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.7604009336811752, |
| "grad_norm": 0.7035579681396484, |
| "learning_rate": 2.6495726495726492e-05, |
| "loss": 0.4739, |
| "step": 2513 |
| }, |
| { |
| "epoch": 2.761499382122752, |
| "grad_norm": 0.5279493927955627, |
| "learning_rate": 2.6373626373626374e-05, |
| "loss": 0.597, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.762597830564328, |
| "grad_norm": 0.5512554049491882, |
| "learning_rate": 2.625152625152625e-05, |
| "loss": 0.6471, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.763696279005904, |
| "grad_norm": 0.857778012752533, |
| "learning_rate": 2.6129426129426128e-05, |
| "loss": 0.6172, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.7647947274474802, |
| "grad_norm": 0.5348466634750366, |
| "learning_rate": 2.6007326007326004e-05, |
| "loss": 0.8074, |
| "step": 2517 |
| }, |
| { |
| "epoch": 2.765893175889057, |
| "grad_norm": 0.5413629412651062, |
| "learning_rate": 2.5885225885225882e-05, |
| "loss": 0.3879, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.766991624330633, |
| "grad_norm": 0.569411039352417, |
| "learning_rate": 2.576312576312576e-05, |
| "loss": 0.4392, |
| "step": 2519 |
| }, |
| { |
| "epoch": 2.7680900727722095, |
| "grad_norm": 0.5127429962158203, |
| "learning_rate": 2.564102564102564e-05, |
| "loss": 0.6566, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.7691885212137857, |
| "grad_norm": 0.7328614592552185, |
| "learning_rate": 2.5518925518925515e-05, |
| "loss": 0.6801, |
| "step": 2521 |
| }, |
| { |
| "epoch": 2.770286969655362, |
| "grad_norm": 0.615686297416687, |
| "learning_rate": 2.5396825396825394e-05, |
| "loss": 0.6366, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.771385418096938, |
| "grad_norm": 0.5250161290168762, |
| "learning_rate": 2.5274725274725276e-05, |
| "loss": 0.5737, |
| "step": 2523 |
| }, |
| { |
| "epoch": 2.772483866538514, |
| "grad_norm": 0.6708832383155823, |
| "learning_rate": 2.515262515262515e-05, |
| "loss": 0.6681, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.7735823149800907, |
| "grad_norm": 0.6120278835296631, |
| "learning_rate": 2.503052503052503e-05, |
| "loss": 0.4964, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.774680763421667, |
| "grad_norm": 0.7024976015090942, |
| "learning_rate": 2.4908424908424905e-05, |
| "loss": 0.7984, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.7757792118632434, |
| "grad_norm": 7.281716823577881, |
| "learning_rate": 2.478632478632478e-05, |
| "loss": 0.7191, |
| "step": 2527 |
| }, |
| { |
| "epoch": 2.7768776603048195, |
| "grad_norm": 0.7347024083137512, |
| "learning_rate": 2.4664224664224663e-05, |
| "loss": 0.8684, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.7779761087463957, |
| "grad_norm": 1.1338274478912354, |
| "learning_rate": 2.454212454212454e-05, |
| "loss": 0.5936, |
| "step": 2529 |
| }, |
| { |
| "epoch": 2.779074557187972, |
| "grad_norm": 0.4176536202430725, |
| "learning_rate": 2.4420024420024417e-05, |
| "loss": 0.445, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.7801730056295484, |
| "grad_norm": 0.9390072822570801, |
| "learning_rate": 2.4297924297924295e-05, |
| "loss": 0.5821, |
| "step": 2531 |
| }, |
| { |
| "epoch": 2.7812714540711245, |
| "grad_norm": 1.1045840978622437, |
| "learning_rate": 2.4175824175824174e-05, |
| "loss": 0.7372, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.7823699025127007, |
| "grad_norm": 0.5568689703941345, |
| "learning_rate": 2.4053724053724053e-05, |
| "loss": 0.5005, |
| "step": 2533 |
| }, |
| { |
| "epoch": 2.7834683509542772, |
| "grad_norm": 0.2747582793235779, |
| "learning_rate": 2.3931623931623928e-05, |
| "loss": 0.5778, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.7845667993958534, |
| "grad_norm": 1.4027804136276245, |
| "learning_rate": 2.3809523809523807e-05, |
| "loss": 0.5368, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.7856652478374295, |
| "grad_norm": 0.7523220777511597, |
| "learning_rate": 2.368742368742369e-05, |
| "loss": 0.58, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.7867636962790057, |
| "grad_norm": 0.33777353167533875, |
| "learning_rate": 2.3565323565323564e-05, |
| "loss": 0.5269, |
| "step": 2537 |
| }, |
| { |
| "epoch": 2.7878621447205822, |
| "grad_norm": 0.5818787217140198, |
| "learning_rate": 2.344322344322344e-05, |
| "loss": 0.4459, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.7889605931621584, |
| "grad_norm": 0.36858034133911133, |
| "learning_rate": 2.3321123321123318e-05, |
| "loss": 0.712, |
| "step": 2539 |
| }, |
| { |
| "epoch": 2.790059041603735, |
| "grad_norm": 0.5299241542816162, |
| "learning_rate": 2.3199023199023194e-05, |
| "loss": 0.6086, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.791157490045311, |
| "grad_norm": 2.432325601577759, |
| "learning_rate": 2.3076923076923076e-05, |
| "loss": 1.0386, |
| "step": 2541 |
| }, |
| { |
| "epoch": 2.7922559384868872, |
| "grad_norm": 0.746638834476471, |
| "learning_rate": 2.2954822954822954e-05, |
| "loss": 0.7372, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.7933543869284634, |
| "grad_norm": 0.6017647981643677, |
| "learning_rate": 2.283272283272283e-05, |
| "loss": 0.9134, |
| "step": 2543 |
| }, |
| { |
| "epoch": 2.79445283537004, |
| "grad_norm": 0.7385385036468506, |
| "learning_rate": 2.271062271062271e-05, |
| "loss": 0.6827, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.795551283811616, |
| "grad_norm": 0.6607246994972229, |
| "learning_rate": 2.2588522588522587e-05, |
| "loss": 0.6333, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.7966497322531922, |
| "grad_norm": 0.40185117721557617, |
| "learning_rate": 2.2466422466422466e-05, |
| "loss": 0.6589, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.797748180694769, |
| "grad_norm": 0.48225662112236023, |
| "learning_rate": 2.234432234432234e-05, |
| "loss": 0.6571, |
| "step": 2547 |
| }, |
| { |
| "epoch": 2.798846629136345, |
| "grad_norm": 0.8996065855026245, |
| "learning_rate": 2.222222222222222e-05, |
| "loss": 0.7518, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.799945077577921, |
| "grad_norm": 0.7139112949371338, |
| "learning_rate": 2.21001221001221e-05, |
| "loss": 0.6517, |
| "step": 2549 |
| }, |
| { |
| "epoch": 2.8010435260194972, |
| "grad_norm": 0.5433416366577148, |
| "learning_rate": 2.1978021978021977e-05, |
| "loss": 0.3799, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.802141974461074, |
| "grad_norm": 0.3883088231086731, |
| "learning_rate": 2.1855921855921853e-05, |
| "loss": 0.9269, |
| "step": 2551 |
| }, |
| { |
| "epoch": 2.80324042290265, |
| "grad_norm": 0.5275357961654663, |
| "learning_rate": 2.173382173382173e-05, |
| "loss": 0.6606, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.8043388713442265, |
| "grad_norm": 0.4666341543197632, |
| "learning_rate": 2.1611721611721607e-05, |
| "loss": 0.6982, |
| "step": 2553 |
| }, |
| { |
| "epoch": 2.8054373197858027, |
| "grad_norm": 0.9221529364585876, |
| "learning_rate": 2.148962148962149e-05, |
| "loss": 0.4769, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.806535768227379, |
| "grad_norm": 0.7469640374183655, |
| "learning_rate": 2.1367521367521368e-05, |
| "loss": 0.6985, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.807634216668955, |
| "grad_norm": 0.6858775615692139, |
| "learning_rate": 2.1245421245421243e-05, |
| "loss": 0.4511, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.808732665110531, |
| "grad_norm": 1.266801357269287, |
| "learning_rate": 2.112332112332112e-05, |
| "loss": 0.421, |
| "step": 2557 |
| }, |
| { |
| "epoch": 2.8098311135521077, |
| "grad_norm": 0.5506262183189392, |
| "learning_rate": 2.1001221001221e-05, |
| "loss": 0.6082, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.810929561993684, |
| "grad_norm": 0.5359029173851013, |
| "learning_rate": 2.087912087912088e-05, |
| "loss": 0.8111, |
| "step": 2559 |
| }, |
| { |
| "epoch": 2.8120280104352604, |
| "grad_norm": 0.6969206929206848, |
| "learning_rate": 2.0757020757020754e-05, |
| "loss": 0.8331, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.8131264588768365, |
| "grad_norm": 0.6040379405021667, |
| "learning_rate": 2.0634920634920633e-05, |
| "loss": 0.575, |
| "step": 2561 |
| }, |
| { |
| "epoch": 2.8142249073184127, |
| "grad_norm": 1.3847273588180542, |
| "learning_rate": 2.0512820512820512e-05, |
| "loss": 0.5442, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.815323355759989, |
| "grad_norm": 0.8050490617752075, |
| "learning_rate": 2.039072039072039e-05, |
| "loss": 0.6267, |
| "step": 2563 |
| }, |
| { |
| "epoch": 2.8164218042015654, |
| "grad_norm": 0.5663136839866638, |
| "learning_rate": 2.0268620268620266e-05, |
| "loss": 0.5246, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.8175202526431415, |
| "grad_norm": 0.3316130042076111, |
| "learning_rate": 2.0146520146520144e-05, |
| "loss": 0.5175, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.8186187010847177, |
| "grad_norm": 0.4782855808734894, |
| "learning_rate": 2.002442002442002e-05, |
| "loss": 0.5111, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.8197171495262943, |
| "grad_norm": 0.44766396284103394, |
| "learning_rate": 1.9902319902319902e-05, |
| "loss": 0.5825, |
| "step": 2567 |
| }, |
| { |
| "epoch": 2.8208155979678704, |
| "grad_norm": 0.6830618977546692, |
| "learning_rate": 1.978021978021978e-05, |
| "loss": 0.5685, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.8219140464094465, |
| "grad_norm": 0.5860748887062073, |
| "learning_rate": 1.9658119658119656e-05, |
| "loss": 0.7557, |
| "step": 2569 |
| }, |
| { |
| "epoch": 2.8230124948510227, |
| "grad_norm": 0.49533459544181824, |
| "learning_rate": 1.953601953601953e-05, |
| "loss": 0.7326, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.8241109432925993, |
| "grad_norm": 0.4989941418170929, |
| "learning_rate": 1.9413919413919413e-05, |
| "loss": 0.5757, |
| "step": 2571 |
| }, |
| { |
| "epoch": 2.8252093917341754, |
| "grad_norm": 0.4973461627960205, |
| "learning_rate": 1.9291819291819292e-05, |
| "loss": 0.5357, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.826307840175752, |
| "grad_norm": 0.7442370057106018, |
| "learning_rate": 1.9169719169719167e-05, |
| "loss": 0.7283, |
| "step": 2573 |
| }, |
| { |
| "epoch": 2.827406288617328, |
| "grad_norm": 1.3321865797042847, |
| "learning_rate": 1.9047619047619046e-05, |
| "loss": 0.5107, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.8285047370589043, |
| "grad_norm": 0.47394871711730957, |
| "learning_rate": 1.892551892551892e-05, |
| "loss": 0.5495, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.8296031855004804, |
| "grad_norm": 0.6102151274681091, |
| "learning_rate": 1.8803418803418804e-05, |
| "loss": 0.5983, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.830701633942057, |
| "grad_norm": 0.4657471179962158, |
| "learning_rate": 1.868131868131868e-05, |
| "loss": 0.5937, |
| "step": 2577 |
| }, |
| { |
| "epoch": 2.831800082383633, |
| "grad_norm": 0.41180238127708435, |
| "learning_rate": 1.8559218559218558e-05, |
| "loss": 0.7775, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.8328985308252093, |
| "grad_norm": 3.5043845176696777, |
| "learning_rate": 1.8437118437118436e-05, |
| "loss": 0.5304, |
| "step": 2579 |
| }, |
| { |
| "epoch": 2.833996979266786, |
| "grad_norm": 0.4502231776714325, |
| "learning_rate": 1.831501831501831e-05, |
| "loss": 0.6556, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.835095427708362, |
| "grad_norm": 0.6165898442268372, |
| "learning_rate": 1.819291819291819e-05, |
| "loss": 0.8434, |
| "step": 2581 |
| }, |
| { |
| "epoch": 2.836193876149938, |
| "grad_norm": 0.5112649202346802, |
| "learning_rate": 1.807081807081807e-05, |
| "loss": 0.7429, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.8372923245915143, |
| "grad_norm": 0.4834790527820587, |
| "learning_rate": 1.7948717948717948e-05, |
| "loss": 0.5772, |
| "step": 2583 |
| }, |
| { |
| "epoch": 2.838390773033091, |
| "grad_norm": 0.4251219630241394, |
| "learning_rate": 1.7826617826617826e-05, |
| "loss": 0.5192, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.839489221474667, |
| "grad_norm": 0.7645363807678223, |
| "learning_rate": 1.7704517704517705e-05, |
| "loss": 0.6624, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.8405876699162436, |
| "grad_norm": 0.5651314854621887, |
| "learning_rate": 1.758241758241758e-05, |
| "loss": 0.5829, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.8416861183578197, |
| "grad_norm": 1.059164047241211, |
| "learning_rate": 1.746031746031746e-05, |
| "loss": 0.6688, |
| "step": 2587 |
| }, |
| { |
| "epoch": 2.842784566799396, |
| "grad_norm": 2.2424001693725586, |
| "learning_rate": 1.7338217338217338e-05, |
| "loss": 0.4515, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.843883015240972, |
| "grad_norm": 0.6211466789245605, |
| "learning_rate": 1.7216117216117213e-05, |
| "loss": 0.836, |
| "step": 2589 |
| }, |
| { |
| "epoch": 2.8449814636825486, |
| "grad_norm": 0.4224345088005066, |
| "learning_rate": 1.7094017094017092e-05, |
| "loss": 0.536, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.8460799121241247, |
| "grad_norm": 0.7985780239105225, |
| "learning_rate": 1.697191697191697e-05, |
| "loss": 0.7433, |
| "step": 2591 |
| }, |
| { |
| "epoch": 2.847178360565701, |
| "grad_norm": 1.4033039808273315, |
| "learning_rate": 1.684981684981685e-05, |
| "loss": 0.7479, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.8482768090072774, |
| "grad_norm": 1.1432255506515503, |
| "learning_rate": 1.6727716727716725e-05, |
| "loss": 0.652, |
| "step": 2593 |
| }, |
| { |
| "epoch": 2.8493752574488536, |
| "grad_norm": 0.9324535727500916, |
| "learning_rate": 1.6605616605616603e-05, |
| "loss": 0.5225, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.8504737058904297, |
| "grad_norm": 0.5573447942733765, |
| "learning_rate": 1.6483516483516482e-05, |
| "loss": 0.6649, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.851572154332006, |
| "grad_norm": 0.6875207424163818, |
| "learning_rate": 1.636141636141636e-05, |
| "loss": 0.7334, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.8526706027735824, |
| "grad_norm": 0.32099124789237976, |
| "learning_rate": 1.6239316239316236e-05, |
| "loss": 0.5732, |
| "step": 2597 |
| }, |
| { |
| "epoch": 2.8537690512151586, |
| "grad_norm": 0.4142940938472748, |
| "learning_rate": 1.6117216117216118e-05, |
| "loss": 0.6605, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.8548674996567347, |
| "grad_norm": 0.5377205610275269, |
| "learning_rate": 1.5995115995115994e-05, |
| "loss": 0.5556, |
| "step": 2599 |
| }, |
| { |
| "epoch": 2.8559659480983113, |
| "grad_norm": 0.43509960174560547, |
| "learning_rate": 1.5873015873015872e-05, |
| "loss": 0.8321, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.8570643965398874, |
| "grad_norm": 0.4376494586467743, |
| "learning_rate": 1.575091575091575e-05, |
| "loss": 0.6392, |
| "step": 2601 |
| }, |
| { |
| "epoch": 2.8581628449814636, |
| "grad_norm": 0.507837176322937, |
| "learning_rate": 1.5628815628815626e-05, |
| "loss": 0.5326, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.8592612934230397, |
| "grad_norm": 29.0502986907959, |
| "learning_rate": 1.5506715506715505e-05, |
| "loss": 0.5478, |
| "step": 2603 |
| }, |
| { |
| "epoch": 2.8603597418646163, |
| "grad_norm": 0.6940420866012573, |
| "learning_rate": 1.5384615384615384e-05, |
| "loss": 1.3063, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.8614581903061924, |
| "grad_norm": 0.7178813219070435, |
| "learning_rate": 1.5262515262515263e-05, |
| "loss": 0.7447, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.862556638747769, |
| "grad_norm": 0.6209506392478943, |
| "learning_rate": 1.514041514041514e-05, |
| "loss": 0.5496, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.863655087189345, |
| "grad_norm": 0.5526819825172424, |
| "learning_rate": 1.5018315018315018e-05, |
| "loss": 0.4224, |
| "step": 2607 |
| }, |
| { |
| "epoch": 2.8647535356309213, |
| "grad_norm": 0.5056405663490295, |
| "learning_rate": 1.4896214896214895e-05, |
| "loss": 0.6248, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.8658519840724974, |
| "grad_norm": 2.416952610015869, |
| "learning_rate": 1.4774114774114774e-05, |
| "loss": 0.7551, |
| "step": 2609 |
| }, |
| { |
| "epoch": 2.866950432514074, |
| "grad_norm": 0.52223140001297, |
| "learning_rate": 1.4652014652014651e-05, |
| "loss": 1.1146, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.86804888095565, |
| "grad_norm": 0.685767650604248, |
| "learning_rate": 1.4529914529914528e-05, |
| "loss": 0.715, |
| "step": 2611 |
| }, |
| { |
| "epoch": 2.8691473293972263, |
| "grad_norm": 0.650374174118042, |
| "learning_rate": 1.4407814407814407e-05, |
| "loss": 0.8844, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.870245777838803, |
| "grad_norm": 0.46946465969085693, |
| "learning_rate": 1.4285714285714284e-05, |
| "loss": 0.9545, |
| "step": 2613 |
| }, |
| { |
| "epoch": 2.871344226280379, |
| "grad_norm": 0.5312052369117737, |
| "learning_rate": 1.4163614163614162e-05, |
| "loss": 0.5204, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.872442674721955, |
| "grad_norm": 0.41921889781951904, |
| "learning_rate": 1.404151404151404e-05, |
| "loss": 0.4614, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.8735411231635313, |
| "grad_norm": 0.513203501701355, |
| "learning_rate": 1.3919413919413918e-05, |
| "loss": 0.613, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.874639571605108, |
| "grad_norm": 1.1020901203155518, |
| "learning_rate": 1.3797313797313795e-05, |
| "loss": 0.525, |
| "step": 2617 |
| }, |
| { |
| "epoch": 2.875738020046684, |
| "grad_norm": 0.39301392436027527, |
| "learning_rate": 1.3675213675213674e-05, |
| "loss": 0.5799, |
| "step": 2618 |
| }, |
| { |
| "epoch": 2.8768364684882606, |
| "grad_norm": 1.576910376548767, |
| "learning_rate": 1.3553113553113551e-05, |
| "loss": 0.6286, |
| "step": 2619 |
| }, |
| { |
| "epoch": 2.8779349169298367, |
| "grad_norm": 0.36711424589157104, |
| "learning_rate": 1.3431013431013431e-05, |
| "loss": 0.7542, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.879033365371413, |
| "grad_norm": 1.2777636051177979, |
| "learning_rate": 1.3308913308913308e-05, |
| "loss": 0.6269, |
| "step": 2621 |
| }, |
| { |
| "epoch": 2.880131813812989, |
| "grad_norm": 0.5584180355072021, |
| "learning_rate": 1.3186813186813187e-05, |
| "loss": 0.5633, |
| "step": 2622 |
| }, |
| { |
| "epoch": 2.8812302622545656, |
| "grad_norm": 1.2418673038482666, |
| "learning_rate": 1.3064713064713064e-05, |
| "loss": 0.537, |
| "step": 2623 |
| }, |
| { |
| "epoch": 2.8823287106961417, |
| "grad_norm": 0.5850531458854675, |
| "learning_rate": 1.2942612942612941e-05, |
| "loss": 0.595, |
| "step": 2624 |
| }, |
| { |
| "epoch": 2.883427159137718, |
| "grad_norm": 1.054592251777649, |
| "learning_rate": 1.282051282051282e-05, |
| "loss": 0.8308, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.8845256075792944, |
| "grad_norm": 0.3231412470340729, |
| "learning_rate": 1.2698412698412697e-05, |
| "loss": 0.4044, |
| "step": 2626 |
| }, |
| { |
| "epoch": 2.8856240560208706, |
| "grad_norm": 0.47942933440208435, |
| "learning_rate": 1.2576312576312576e-05, |
| "loss": 0.6299, |
| "step": 2627 |
| }, |
| { |
| "epoch": 2.8867225044624467, |
| "grad_norm": 0.4884187579154968, |
| "learning_rate": 1.2454212454212453e-05, |
| "loss": 0.6606, |
| "step": 2628 |
| }, |
| { |
| "epoch": 2.887820952904023, |
| "grad_norm": 0.6658734083175659, |
| "learning_rate": 1.2332112332112331e-05, |
| "loss": 0.642, |
| "step": 2629 |
| }, |
| { |
| "epoch": 2.8889194013455994, |
| "grad_norm": 0.24990247189998627, |
| "learning_rate": 1.2210012210012208e-05, |
| "loss": 0.4041, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.8900178497871756, |
| "grad_norm": 0.6446508169174194, |
| "learning_rate": 1.2087912087912087e-05, |
| "loss": 0.7126, |
| "step": 2631 |
| }, |
| { |
| "epoch": 2.891116298228752, |
| "grad_norm": 0.7800988554954529, |
| "learning_rate": 1.1965811965811964e-05, |
| "loss": 0.6733, |
| "step": 2632 |
| }, |
| { |
| "epoch": 2.8922147466703283, |
| "grad_norm": 0.5319482684135437, |
| "learning_rate": 1.1843711843711844e-05, |
| "loss": 0.6445, |
| "step": 2633 |
| }, |
| { |
| "epoch": 2.8933131951119044, |
| "grad_norm": 0.6029678583145142, |
| "learning_rate": 1.172161172161172e-05, |
| "loss": 0.7642, |
| "step": 2634 |
| }, |
| { |
| "epoch": 2.8944116435534806, |
| "grad_norm": 0.9029693007469177, |
| "learning_rate": 1.1599511599511597e-05, |
| "loss": 0.635, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.8955100919950567, |
| "grad_norm": 0.6022691130638123, |
| "learning_rate": 1.1477411477411477e-05, |
| "loss": 0.5361, |
| "step": 2636 |
| }, |
| { |
| "epoch": 2.8966085404366333, |
| "grad_norm": 0.6777801513671875, |
| "learning_rate": 1.1355311355311354e-05, |
| "loss": 0.5099, |
| "step": 2637 |
| }, |
| { |
| "epoch": 2.8977069888782094, |
| "grad_norm": 0.4157528877258301, |
| "learning_rate": 1.1233211233211233e-05, |
| "loss": 0.5038, |
| "step": 2638 |
| }, |
| { |
| "epoch": 2.898805437319786, |
| "grad_norm": 2.6101133823394775, |
| "learning_rate": 1.111111111111111e-05, |
| "loss": 0.6324, |
| "step": 2639 |
| }, |
| { |
| "epoch": 2.899903885761362, |
| "grad_norm": 0.6885612607002258, |
| "learning_rate": 1.0989010989010989e-05, |
| "loss": 0.4931, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.9010023342029383, |
| "grad_norm": 0.5510079264640808, |
| "learning_rate": 1.0866910866910866e-05, |
| "loss": 0.5088, |
| "step": 2641 |
| }, |
| { |
| "epoch": 2.9021007826445144, |
| "grad_norm": 0.6099854111671448, |
| "learning_rate": 1.0744810744810744e-05, |
| "loss": 0.4647, |
| "step": 2642 |
| }, |
| { |
| "epoch": 2.903199231086091, |
| "grad_norm": 0.4390881657600403, |
| "learning_rate": 1.0622710622710621e-05, |
| "loss": 0.6787, |
| "step": 2643 |
| }, |
| { |
| "epoch": 2.904297679527667, |
| "grad_norm": 0.46238628029823303, |
| "learning_rate": 1.05006105006105e-05, |
| "loss": 0.5655, |
| "step": 2644 |
| }, |
| { |
| "epoch": 2.9053961279692433, |
| "grad_norm": 0.479106605052948, |
| "learning_rate": 1.0378510378510377e-05, |
| "loss": 0.7833, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.90649457641082, |
| "grad_norm": 0.4643683135509491, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.4563, |
| "step": 2646 |
| }, |
| { |
| "epoch": 2.907593024852396, |
| "grad_norm": 0.4173976480960846, |
| "learning_rate": 1.0134310134310133e-05, |
| "loss": 0.6614, |
| "step": 2647 |
| }, |
| { |
| "epoch": 2.908691473293972, |
| "grad_norm": 0.7158990502357483, |
| "learning_rate": 1.001221001221001e-05, |
| "loss": 0.7342, |
| "step": 2648 |
| }, |
| { |
| "epoch": 2.9097899217355483, |
| "grad_norm": 0.7276301980018616, |
| "learning_rate": 9.89010989010989e-06, |
| "loss": 0.6883, |
| "step": 2649 |
| }, |
| { |
| "epoch": 2.910888370177125, |
| "grad_norm": 0.63588947057724, |
| "learning_rate": 9.768009768009766e-06, |
| "loss": 0.7533, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.911986818618701, |
| "grad_norm": 1.8038127422332764, |
| "learning_rate": 9.645909645909646e-06, |
| "loss": 0.6238, |
| "step": 2651 |
| }, |
| { |
| "epoch": 2.9130852670602776, |
| "grad_norm": 0.7289617657661438, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 0.4767, |
| "step": 2652 |
| }, |
| { |
| "epoch": 2.9141837155018537, |
| "grad_norm": 0.3828502893447876, |
| "learning_rate": 9.401709401709402e-06, |
| "loss": 0.4812, |
| "step": 2653 |
| }, |
| { |
| "epoch": 2.91528216394343, |
| "grad_norm": 0.5157826542854309, |
| "learning_rate": 9.279609279609279e-06, |
| "loss": 0.703, |
| "step": 2654 |
| }, |
| { |
| "epoch": 2.916380612385006, |
| "grad_norm": 0.6833345890045166, |
| "learning_rate": 9.157509157509156e-06, |
| "loss": 0.7471, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.9174790608265826, |
| "grad_norm": 1.0189886093139648, |
| "learning_rate": 9.035409035409035e-06, |
| "loss": 0.6065, |
| "step": 2656 |
| }, |
| { |
| "epoch": 2.9185775092681587, |
| "grad_norm": 0.5197221040725708, |
| "learning_rate": 8.913308913308913e-06, |
| "loss": 0.5904, |
| "step": 2657 |
| }, |
| { |
| "epoch": 2.919675957709735, |
| "grad_norm": 0.6265780925750732, |
| "learning_rate": 8.79120879120879e-06, |
| "loss": 0.5622, |
| "step": 2658 |
| }, |
| { |
| "epoch": 2.9207744061513115, |
| "grad_norm": 0.5703533887863159, |
| "learning_rate": 8.669108669108669e-06, |
| "loss": 0.8005, |
| "step": 2659 |
| }, |
| { |
| "epoch": 2.9218728545928876, |
| "grad_norm": 0.8656613230705261, |
| "learning_rate": 8.547008547008546e-06, |
| "loss": 0.4942, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.9229713030344637, |
| "grad_norm": 0.6180423498153687, |
| "learning_rate": 8.424908424908425e-06, |
| "loss": 0.8163, |
| "step": 2661 |
| }, |
| { |
| "epoch": 2.92406975147604, |
| "grad_norm": 0.7308143377304077, |
| "learning_rate": 8.302808302808302e-06, |
| "loss": 0.7639, |
| "step": 2662 |
| }, |
| { |
| "epoch": 2.9251681999176165, |
| "grad_norm": 0.585617184638977, |
| "learning_rate": 8.18070818070818e-06, |
| "loss": 0.7614, |
| "step": 2663 |
| }, |
| { |
| "epoch": 2.9262666483591926, |
| "grad_norm": 0.5277345776557922, |
| "learning_rate": 8.058608058608059e-06, |
| "loss": 0.6489, |
| "step": 2664 |
| }, |
| { |
| "epoch": 2.927365096800769, |
| "grad_norm": 0.3540293574333191, |
| "learning_rate": 7.936507936507936e-06, |
| "loss": 0.4503, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.9284635452423453, |
| "grad_norm": 0.554492175579071, |
| "learning_rate": 7.814407814407813e-06, |
| "loss": 0.5785, |
| "step": 2666 |
| }, |
| { |
| "epoch": 2.9295619936839215, |
| "grad_norm": 0.5547875761985779, |
| "learning_rate": 7.692307692307692e-06, |
| "loss": 0.5763, |
| "step": 2667 |
| }, |
| { |
| "epoch": 2.9306604421254976, |
| "grad_norm": 0.745947003364563, |
| "learning_rate": 7.57020757020757e-06, |
| "loss": 0.512, |
| "step": 2668 |
| }, |
| { |
| "epoch": 2.931758890567074, |
| "grad_norm": 0.47691571712493896, |
| "learning_rate": 7.448107448107448e-06, |
| "loss": 0.7018, |
| "step": 2669 |
| }, |
| { |
| "epoch": 2.9328573390086503, |
| "grad_norm": 0.9611607789993286, |
| "learning_rate": 7.3260073260073255e-06, |
| "loss": 0.7419, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.9339557874502264, |
| "grad_norm": 0.5495268106460571, |
| "learning_rate": 7.203907203907203e-06, |
| "loss": 0.6096, |
| "step": 2671 |
| }, |
| { |
| "epoch": 2.935054235891803, |
| "grad_norm": 0.8863226771354675, |
| "learning_rate": 7.081807081807081e-06, |
| "loss": 0.7149, |
| "step": 2672 |
| }, |
| { |
| "epoch": 2.936152684333379, |
| "grad_norm": 0.4234665334224701, |
| "learning_rate": 6.959706959706959e-06, |
| "loss": 0.6913, |
| "step": 2673 |
| }, |
| { |
| "epoch": 2.9372511327749553, |
| "grad_norm": 0.9667326211929321, |
| "learning_rate": 6.837606837606837e-06, |
| "loss": 0.4181, |
| "step": 2674 |
| }, |
| { |
| "epoch": 2.9383495812165314, |
| "grad_norm": 0.543683648109436, |
| "learning_rate": 6.715506715506716e-06, |
| "loss": 0.6329, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.939448029658108, |
| "grad_norm": 0.5083779692649841, |
| "learning_rate": 6.5934065934065935e-06, |
| "loss": 0.8742, |
| "step": 2676 |
| }, |
| { |
| "epoch": 2.940546478099684, |
| "grad_norm": 0.7212001085281372, |
| "learning_rate": 6.4713064713064706e-06, |
| "loss": 0.6912, |
| "step": 2677 |
| }, |
| { |
| "epoch": 2.9416449265412603, |
| "grad_norm": 0.9474835991859436, |
| "learning_rate": 6.349206349206348e-06, |
| "loss": 0.649, |
| "step": 2678 |
| }, |
| { |
| "epoch": 2.942743374982837, |
| "grad_norm": 0.8142021298408508, |
| "learning_rate": 6.227106227106226e-06, |
| "loss": 0.6136, |
| "step": 2679 |
| }, |
| { |
| "epoch": 2.943841823424413, |
| "grad_norm": 2.9018187522888184, |
| "learning_rate": 6.105006105006104e-06, |
| "loss": 0.7157, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.944940271865989, |
| "grad_norm": 0.4023605287075043, |
| "learning_rate": 5.982905982905982e-06, |
| "loss": 0.5675, |
| "step": 2681 |
| }, |
| { |
| "epoch": 2.9460387203075653, |
| "grad_norm": 0.3693840801715851, |
| "learning_rate": 5.86080586080586e-06, |
| "loss": 0.5982, |
| "step": 2682 |
| }, |
| { |
| "epoch": 2.947137168749142, |
| "grad_norm": 0.4298234283924103, |
| "learning_rate": 5.738705738705739e-06, |
| "loss": 0.5379, |
| "step": 2683 |
| }, |
| { |
| "epoch": 2.948235617190718, |
| "grad_norm": 0.6495395302772522, |
| "learning_rate": 5.6166056166056165e-06, |
| "loss": 0.5411, |
| "step": 2684 |
| }, |
| { |
| "epoch": 2.9493340656322946, |
| "grad_norm": 0.44857510924339294, |
| "learning_rate": 5.494505494505494e-06, |
| "loss": 0.5154, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.9504325140738707, |
| "grad_norm": 0.7485830187797546, |
| "learning_rate": 5.372405372405372e-06, |
| "loss": 0.6595, |
| "step": 2686 |
| }, |
| { |
| "epoch": 2.951530962515447, |
| "grad_norm": 0.5141469836235046, |
| "learning_rate": 5.25030525030525e-06, |
| "loss": 0.6289, |
| "step": 2687 |
| }, |
| { |
| "epoch": 2.952629410957023, |
| "grad_norm": 0.8847435712814331, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.6734, |
| "step": 2688 |
| }, |
| { |
| "epoch": 2.9537278593985996, |
| "grad_norm": 0.570573091506958, |
| "learning_rate": 5.006105006105005e-06, |
| "loss": 0.7013, |
| "step": 2689 |
| }, |
| { |
| "epoch": 2.9548263078401757, |
| "grad_norm": 0.4376991391181946, |
| "learning_rate": 4.884004884004883e-06, |
| "loss": 0.5918, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.955924756281752, |
| "grad_norm": 0.5480318069458008, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 0.6227, |
| "step": 2691 |
| }, |
| { |
| "epoch": 2.9570232047233285, |
| "grad_norm": 0.5831297636032104, |
| "learning_rate": 4.639804639804639e-06, |
| "loss": 0.6264, |
| "step": 2692 |
| }, |
| { |
| "epoch": 2.9581216531649046, |
| "grad_norm": 1.5778921842575073, |
| "learning_rate": 4.517704517704517e-06, |
| "loss": 0.6352, |
| "step": 2693 |
| }, |
| { |
| "epoch": 2.9592201016064807, |
| "grad_norm": 0.9567496180534363, |
| "learning_rate": 4.395604395604395e-06, |
| "loss": 0.6067, |
| "step": 2694 |
| }, |
| { |
| "epoch": 2.960318550048057, |
| "grad_norm": 0.5237869620323181, |
| "learning_rate": 4.273504273504273e-06, |
| "loss": 0.8241, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.9614169984896335, |
| "grad_norm": 0.3452164828777313, |
| "learning_rate": 4.151404151404151e-06, |
| "loss": 0.5718, |
| "step": 2696 |
| }, |
| { |
| "epoch": 2.9625154469312096, |
| "grad_norm": 0.42237767577171326, |
| "learning_rate": 4.0293040293040296e-06, |
| "loss": 0.5199, |
| "step": 2697 |
| }, |
| { |
| "epoch": 2.963613895372786, |
| "grad_norm": 0.7035055756568909, |
| "learning_rate": 3.907203907203907e-06, |
| "loss": 0.7078, |
| "step": 2698 |
| }, |
| { |
| "epoch": 2.9647123438143623, |
| "grad_norm": 0.39236482977867126, |
| "learning_rate": 3.785103785103785e-06, |
| "loss": 0.59, |
| "step": 2699 |
| }, |
| { |
| "epoch": 2.9658107922559385, |
| "grad_norm": 1.1658680438995361, |
| "learning_rate": 3.6630036630036627e-06, |
| "loss": 0.53, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.9669092406975146, |
| "grad_norm": 0.6797634363174438, |
| "learning_rate": 3.5409035409035406e-06, |
| "loss": 0.6763, |
| "step": 2701 |
| }, |
| { |
| "epoch": 2.968007689139091, |
| "grad_norm": 1.0421425104141235, |
| "learning_rate": 3.4188034188034185e-06, |
| "loss": 0.4, |
| "step": 2702 |
| }, |
| { |
| "epoch": 2.9691061375806673, |
| "grad_norm": 0.36937475204467773, |
| "learning_rate": 3.2967032967032968e-06, |
| "loss": 0.5401, |
| "step": 2703 |
| }, |
| { |
| "epoch": 2.9702045860222435, |
| "grad_norm": 0.4324638843536377, |
| "learning_rate": 3.174603174603174e-06, |
| "loss": 0.5882, |
| "step": 2704 |
| }, |
| { |
| "epoch": 2.97130303446382, |
| "grad_norm": 1.2700526714324951, |
| "learning_rate": 3.052503052503052e-06, |
| "loss": 0.613, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.972401482905396, |
| "grad_norm": 0.5261131525039673, |
| "learning_rate": 2.93040293040293e-06, |
| "loss": 0.6279, |
| "step": 2706 |
| }, |
| { |
| "epoch": 2.9734999313469723, |
| "grad_norm": 0.42924660444259644, |
| "learning_rate": 2.8083028083028082e-06, |
| "loss": 1.0058, |
| "step": 2707 |
| }, |
| { |
| "epoch": 2.9745983797885485, |
| "grad_norm": 3.100399971008301, |
| "learning_rate": 2.686202686202686e-06, |
| "loss": 0.5209, |
| "step": 2708 |
| }, |
| { |
| "epoch": 2.975696828230125, |
| "grad_norm": 0.3666403293609619, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 0.5231, |
| "step": 2709 |
| }, |
| { |
| "epoch": 2.976795276671701, |
| "grad_norm": 1.1315009593963623, |
| "learning_rate": 2.4420024420024414e-06, |
| "loss": 0.4449, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.9778937251132778, |
| "grad_norm": 0.3323412537574768, |
| "learning_rate": 2.3199023199023197e-06, |
| "loss": 0.4806, |
| "step": 2711 |
| }, |
| { |
| "epoch": 2.978992173554854, |
| "grad_norm": 0.7348967790603638, |
| "learning_rate": 2.1978021978021976e-06, |
| "loss": 0.7521, |
| "step": 2712 |
| }, |
| { |
| "epoch": 2.98009062199643, |
| "grad_norm": 1.018898606300354, |
| "learning_rate": 2.0757020757020754e-06, |
| "loss": 0.8468, |
| "step": 2713 |
| }, |
| { |
| "epoch": 2.981189070438006, |
| "grad_norm": 0.46808505058288574, |
| "learning_rate": 1.9536019536019533e-06, |
| "loss": 0.6992, |
| "step": 2714 |
| }, |
| { |
| "epoch": 2.9822875188795823, |
| "grad_norm": 0.5411276817321777, |
| "learning_rate": 1.8315018315018314e-06, |
| "loss": 0.5949, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.983385967321159, |
| "grad_norm": 0.45061302185058594, |
| "learning_rate": 1.7094017094017092e-06, |
| "loss": 0.4617, |
| "step": 2716 |
| }, |
| { |
| "epoch": 2.984484415762735, |
| "grad_norm": 0.44529294967651367, |
| "learning_rate": 1.587301587301587e-06, |
| "loss": 0.5811, |
| "step": 2717 |
| }, |
| { |
| "epoch": 2.9855828642043116, |
| "grad_norm": 1.255299687385559, |
| "learning_rate": 1.465201465201465e-06, |
| "loss": 1.1899, |
| "step": 2718 |
| }, |
| { |
| "epoch": 2.9866813126458878, |
| "grad_norm": 0.8325234651565552, |
| "learning_rate": 1.343101343101343e-06, |
| "loss": 0.6344, |
| "step": 2719 |
| }, |
| { |
| "epoch": 2.987779761087464, |
| "grad_norm": 1.0692095756530762, |
| "learning_rate": 1.2210012210012207e-06, |
| "loss": 0.5136, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.98887820952904, |
| "grad_norm": 0.4980855882167816, |
| "learning_rate": 1.0989010989010988e-06, |
| "loss": 0.6352, |
| "step": 2721 |
| }, |
| { |
| "epoch": 2.9899766579706166, |
| "grad_norm": 0.8502411246299744, |
| "learning_rate": 9.768009768009766e-07, |
| "loss": 0.599, |
| "step": 2722 |
| }, |
| { |
| "epoch": 2.9910751064121928, |
| "grad_norm": 0.4849570691585541, |
| "learning_rate": 8.547008547008546e-07, |
| "loss": 0.5862, |
| "step": 2723 |
| }, |
| { |
| "epoch": 2.992173554853769, |
| "grad_norm": 0.5491626858711243, |
| "learning_rate": 7.326007326007325e-07, |
| "loss": 0.5634, |
| "step": 2724 |
| }, |
| { |
| "epoch": 2.9932720032953455, |
| "grad_norm": 0.7289263606071472, |
| "learning_rate": 6.105006105006104e-07, |
| "loss": 0.6643, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.9943704517369216, |
| "grad_norm": 1.5343972444534302, |
| "learning_rate": 4.884004884004883e-07, |
| "loss": 0.71, |
| "step": 2726 |
| }, |
| { |
| "epoch": 2.9954689001784978, |
| "grad_norm": 0.5619814395904541, |
| "learning_rate": 3.6630036630036624e-07, |
| "loss": 0.721, |
| "step": 2727 |
| }, |
| { |
| "epoch": 2.996567348620074, |
| "grad_norm": 0.500442624092102, |
| "learning_rate": 2.4420024420024416e-07, |
| "loss": 0.6571, |
| "step": 2728 |
| }, |
| { |
| "epoch": 2.9976657970616505, |
| "grad_norm": 0.42292630672454834, |
| "learning_rate": 1.2210012210012208e-07, |
| "loss": 0.4772, |
| "step": 2729 |
| }, |
| { |
| "epoch": 2.9987642455032266, |
| "grad_norm": 0.4350331425666809, |
| "learning_rate": 0.0, |
| "loss": 0.7493, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.9987642455032266, |
| "step": 2730, |
| "total_flos": 1.0372510312766669e+18, |
| "train_loss": 0.674373844124022, |
| "train_runtime": 11584.4184, |
| "train_samples_per_second": 1.886, |
| "train_steps_per_second": 0.236 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2730, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0372510312766669e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|