| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576235772357823579235802358123582235832358423585235862358723588235892359023591235922359323594235952359623597235982359923600236012360223603236042360523606236072360823609236102361123612236132361423615236162361723618236192362023621236222362323624236252362623627236282362923630236312363223633236342363523636236372363823639236402364123642236432364423645236462364723648236492365023651236522365323654236552365623657236582365923660236612366223663236642366523666236672366823669236702367123672236732367423675236762367723678236792368023681236822368323684236852368623687236882368923690236912369223693236942369523696236972369823699237002370123702237032370423705237062370723708237092371023711237122371323714237152371623717237182371923720237212372223723237242372523726237272372823729237302373123732237332373423735237362373723738237392374023741237422374323744237452374623747237482374923750237512375223753237542375523756237572375823759237602376123762237632376423765237662376723768237692377023771237722377323774237752377623777237782377923780237812378223783237842378523786237872378823789237902379123792237932379423795237962379723798237992380023801238022380323804238052380623807238082380923810238112381223813238142381523816238172381823819238202382123822238232382423825238262382723828238292383023831238322383323834238352383623837238382383923840238412384223843238442384523846238472384823849238502385123852238532385423855238562385723858238592386023861238622386323864238652386623867238682386923870238712387223873238742387523876238772387823879238802388123882238832388423885238862388723888238892389023891238922389323894238952389623897238982389923900239012390223903239042390523906239072390823909239102391123912239132391423915239162391723918239192392023921239222392323924239252392623927239282392923930239312393223933239342393523936239372393823939239402394123942239432394423945239462394723948239492395023951239522395323954239552395623957239582395923960239612396223963239642396523966239672396823969239702397123972239732397423975239762397723978239792398023981239822398323984239852398623987239882398923990239912399223993239942399523996239972399823999240002400124002240032400424005240062400724008240092401024011240122401324014240152401624017240182401924020240212402224023240242402524026240272402824029240302403124032240332403424035240362403724038240392404024041240422404324044240452404624047240482404924050240512405224053240542405524056240572405824059240602406124062240632406424065240662406724068240692407024071240722407324074240752407624077240782407924080240812408224083240842408524086240872408824089240902409124092240932409424095240962409724098240992410024101241022410324104241052410624107241082410924110241112411224113241142411524116241172411824119241202412124122241232412424125241262412724128241292413024131241322413324134241352413624137241382413924140241412414224143241442414524146241472414824149241502415124152241532415424155241562415724158241592416024161241622416324164241652416624167241682416924170241712417224173241742417524176241772417824179241802418124182241832418424185241862418724188241892419024191241922419324194241952419624197241982419924200242012420224203242042420524206242072420824209242102421124212242132421424215242162421724218242192422024221242222422324224242252422624227242282422924230242312423224233242342423524236242372423824239242402424124242242432424424245242462424724248242492425024251242522425324254242552425624257242582425924260242612426224263242642426524266242672426824269242702427124272242732427424275242762427724278242792428024281242822428324284242852428624287242882428924290242912429224293242942429524296242972429824299243002430124302243032430424305243062430724308243092431024311243122431324314243152431624317243182431924320243212432224323243242432524326243272432824329243302433124332243332433424335243362433724338243392434024341243422434324344243452434624347243482434924350243512435224353243542435524356243572435824359243602436124362243632436424365243662436724368243692437024371243722437324374243752437624377243782437924380243812438224383243842438524386243872438824389243902439124392243932439424395243962439724398243992440024401244022440324404244052440624407244082440924410244112441224413244142441524416244172441824419244202442124422244232442424425244262442724428244292443024431244322443324434244352443624437244382443924440244412444224443244442444524446244472444824449244502445124452244532445424455244562445724458244592446024461244622446324464244652446624467244682446924470244712447224473244742447524476244772447824479244802448124482244832448424485244862448724488244892449024491244922449324494244952449624497244982449924500245012450224503245042450524506245072450824509245102451124512245132451424515245162451724518245192452024521245222452324524245252452624527245282452924530245312453224533245342453524536245372453824539245402454124542245432454424545245462454724548245492455024551245522455324554245552455624557245582455924560245612456224563245642456524566245672456824569245702457124572245732457424575245762457724578245792458024581245822458324584245852458624587245882458924590245912459224593245942459524596245972459824599246002460124602246032460424605246062460724608246092461024611246122461324614246152461624617246182461924620246212462224623246242462524626246272462824629246302463124632246332463424635246362463724638246392464024641246422464324644246452464624647246482464924650246512465224653246542465524656246572465824659246602466124662246632466424665246662466724668246692467024671246722467324674246752467624677246782467924680246812468224683246842468524686246872468824689246902469124692246932469424695246962469724698246992470024701247022470324704247052470624707247082470924710247112471224713247142471524716247172471824719247202472124722247232472424725247262472724728247292473024731247322473324734247352473624737247382473924740247412474224743247442474524746247472474824749247502475124752247532475424755247562475724758247592476024761247622476324764247652476624767247682476924770247712477224773247742477524776247772477824779247802478124782247832478424785247862478724788247892479024791247922479324794247952479624797247982479924800248012480224803248042480524806248072480824809248102481124812248132481424815248162481724818248192482024821248222482324824248252482624827248282482924830248312483224833248342483524836248372483824839248402484124842248432484424845248462484724848248492485024851248522485324854248552485624857248582485924860248612486224863248642486524866248672486824869248702487124872248732487424875248762487724878248792488024881248822488324884248852488624887248882488924890248912489224893248942489524896248972489824899249002490124902249032490424905249062490724908249092491024911249122491324914249152491624917249182491924920249212492224923249242492524926249272492824929249302493124932249332493424935249362493724938249392494024941249422494324944249452494624947249482494924950249512495224953249542495524956249572495824959249602496124962249632496424965249662496724968249692497024971249722497324974249752497624977249782497924980249812498224983249842498524986249872498824989249902499124992249932499424995249962499724998249992500025001250022500325004250052500625007250082500925010250112501225013250142501525016250172501825019250202502125022250232502425025250262502725028250292503025031250322503325034250352503625037250382503925040250412504225043250442504525046250472504825049250502505125052250532505425055250562505725058250592506025061250622506325064250652506625067250682506925070250712507225073250742507525076250772507825079250802508125082250832508425085250862508725088250892509025091250922509325094250952509625097250982509925100251012510225103251042510525106251072510825109251102511125112251132511425115251162511725118251192512025121251222512325124251252512625127251282512925130251312513225133251342513525136251372513825139251402514125142251432514425145251462514725148251492515025151251522515325154251552515625157251582515925160251612516225163251642516525166251672516825169251702517125172251732517425175251762517725178251792518025181251822518325184251852518625187251882518925190251912519225193251942519525196251972519825199252002520125202252032520425205252062520725208252092521025211252122521325214252152521625217252182521925220252212522225223252242522525226252272522825229252302523125232252332523425235252362523725238252392524025241252422524325244252452524625247252482524925250252512525225253252542525525256252572525825259252602526125262252632526425265252662526725268252692527025271252722527325274252752527625277252782527925280252812528225283252842528525286252872528825289252902529125292252932529425295252962529725298252992530025301253022530325304253052530625307253082530925310253112531225313253142531525316253172531825319253202532125322253232532425325253262532725328253292533025331253322533325334253352533625337253382533925340253412534225343253442534525346253472534825349253502535125352253532535425355253562535725358253592536025361253622536325364253652536625367253682536925370253712537225373253742537525376253772537825379253802538125382253832538425385253862538725388253892539025391253922539325394253952539625397253982539925400254012540225403254042540525406254072540825409254102541125412254132541425415254162541725418254192542025421254222542325424254252542625427254282542925430254312543225433254342543525436254372543825439254402544125442254432544425445254462544725448254492545025451254522545325454254552545625457254582545925460254612546225463254642546525466254672546825469254702547125472254732547425475254762547725478254792548025481254822548325484254852548625487254882548925490254912549225493254942549525496254972549825499255002550125502255032550425505255062550725508255092551025511255122551325514255152551625517255182551925520255212552225523255242552525526255272552825529255302553125532255332553425535255362553725538255392554025541255422554325544255452554625547255482554925550255512555225553255542555525556255572555825559255602556125562255632556425565255662556725568255692557025571255722557325574255752557625577255782557925580255812558225583255842558525586255872558825589255902559125592255932559425595255962559725598255992560025601256022560325604256052560625607256082560925610256112561225613256142561525616256172561825619256202562125622256232562425625256262562725628256292563025631256322563325634256352563625637256382563925640256412564225643256442564525646256472564825649256502565125652256532565425655256562565725658256592566025661256622566325664256652566625667256682566925670256712567225673256742567525676256772567825679256802568125682256832568425685256862568725688256892569025691256922569325694256952569625697256982569925700257012570225703257042570525706257072570825709257102571125712257132571425715257162571725718257192572025721257222572325724257252572625727257282572925730257312573225733257342573525736257372573825739257402574125742257432574425745257462574725748257492575025751257522575325754257552575625757257582575925760257612576225763257642576525766257672576825769257702577125772257732577425775257762577725778257792578025781257822578325784257852578625787257882578925790257912579225793257942579525796257972579825799258002580125802258032580425805258062580725808258092581025811258122581325814258152581625817258182581925820258212582225823258242582525826258272582825829258302583125832258332583425835258362583725838258392584025841258422584325844258452584625847258482584925850258512585225853258542585525856258572585825859258602586125862258632586425865258662586725868258692587025871258722587325874258752587625877258782587925880258812588225883258842588525886258872588825889258902589125892258932589425895258962589725898258992590025901259022590325904259052590625907259082590925910259112591225913259142591525916259172591825919259202592125922259232592425925259262592725928259292593025931259322593325934259352593625937259382593925940259412594225943259442594525946259472594825949259502595125952259532595425955259562595725958259592596025961259622596325964259652596625967259682596925970259712597225973259742597525976259772597825979259802598125982259832598425985259862598725988259892599025991259922599325994259952599625997259982599926000260012600226003260042600526006260072600826009260102601126012260132601426015260162601726018260192602026021260222602326024260252602626027260282602926030260312603226033260342603526036260372603826039260402604126042260432604426045260462604726048260492605026051260522605326054260552605626057260582605926060260612606226063260642606526066260672606826069260702607126072260732607426075260762607726078260792608026081260822608326084260852608626087260882608926090260912609226093260942609526096260972609826099261002610126102261032610426105261062610726108261092611026111261122611326114261152611626117261182611926120261212612226123261242612526126261272612826129261302613126132261332613426135261362613726138261392614026141261422614326144261452614626147261482614926150261512615226153261542615526156261572615826159261602616126162261632616426165261662616726168261692617026171261722617326174261752617626177261782617926180261812618226183261842618526186261872618826189261902619126192261932619426195261962619726198261992620026201262022620326204262052620626207262082620926210262112621226213262142621526216262172621826219262202622126222262232622426225262262622726228262292623026231262322623326234262352623626237262382623926240262412624226243262442624526246262472624826249262502625126252262532625426255262562625726258262592626026261262622626326264262652626626267262682626926270262712627226273262742627526276262772627826279262802628126282262832628426285262862628726288262892629026291262922629326294262952629626297262982629926300263012630226303263042630526306263072630826309263102631126312263132631426315263162631726318263192632026321263222632326324263252632626327263282632926330263312633226333263342633526336263372633826339263402634126342263432634426345263462634726348263492635026351263522635326354263552635626357263582635926360263612636226363263642636526366263672636826369263702637126372263732637426375263762637726378263792638026381263822638326384263852638626387263882638926390263912639226393263942639526396263972639826399264002640126402264032640426405264062640726408264092641026411264122641326414264152641626417264182641926420264212642226423264242642526426264272642826429264302643126432264332643426435264362643726438264392644026441264422644326444264452644626447264482644926450264512645226453264542645526456264572645826459264602646126462264632646426465264662646726468264692647026471264722647326474264752647626477264782647926480264812648226483264842648526486264872648826489264902649126492264932649426495264962649726498264992650026501265022650326504265052650626507265082650926510265112651226513265142651526516265172651826519265202652126522265232652426525265262652726528265292653026531265322653326534265352653626537265382653926540265412654226543265442654526546265472654826549265502655126552265532655426555265562655726558265592656026561265622656326564265652656626567265682656926570265712657226573265742657526576265772657826579265802658126582265832658426585265862658726588265892659026591265922659326594265952659626597265982659926600266012660226603266042660526606266072660826609266102661126612266132661426615266162661726618266192662026621266222662326624266252662626627266282662926630266312663226633266342663526636266372663826639266402664126642266432664426645266462664726648266492665026651266522665326654266552665626657266582665926660266612666226663266642666526666266672666826669266702667126672266732667426675266762667726678266792668026681266822668326684266852668626687266882668926690266912669226693266942669526696266972669826699267002670126702267032670426705267062670726708267092671026711267122671326714267152671626717267182671926720267212672226723267242672526726267272672826729267302673126732267332673426735267362673726738267392674026741267422674326744267452674626747267482674926750267512675226753267542675526756267572675826759267602676126762267632676426765267662676726768267692677026771267722677326774267752677626777267782677926780267812678226783267842678526786267872678826789267902679126792267932679426795267962679726798267992680026801268022680326804268052680626807268082680926810268112681226813268142681526816268172681826819268202682126822268232682426825268262682726828268292683026831268322683326834268352683626837268382683926840268412684226843268442684526846268472684826849268502685126852268532685426855268562685726858268592686026861268622686326864268652686626867268682686926870268712687226873268742687526876268772687826879268802688126882268832688426885268862688726888268892689026891268922689326894268952689626897268982689926900269012690226903269042690526906269072690826909269102691126912269132691426915269162691726918269192692026921269222692326924269252692626927269282692926930269312693226933269342693526936269372693826939269402694126942269432694426945269462694726948269492695026951269522695326954269552695626957269582695926960269612696226963269642696526966269672696826969269702697126972269732697426975269762697726978269792698026981269822698326984269852698626987269882698926990269912699226993269942699526996269972699826999270002700127002270032700427005270062700727008270092701027011270122701327014270152701627017270182701927020270212702227023270242702527026270272702827029270302703127032270332703427035270362703727038270392704027041270422704327044270452704627047270482704927050270512705227053270542705527056270572705827059270602706127062270632706427065270662706727068270692707027071270722707327074270752707627077270782707927080270812708227083270842708527086270872708827089270902709127092270932709427095270962709727098270992710027101271022710327104271052710627107271082710927110271112711227113271142711527116271172711827119271202712127122271232712427125271262712727128271292713027131271322713327134271352713627137271382713927140271412714227143271442714527146271472714827149271502715127152271532715427155271562715727158271592716027161271622716327164271652716627167271682716927170271712717227173271742717527176271772717827179271802718127182271832718427185271862718727188271892719027191271922719327194271952719627197271982719927200272012720227203272042720527206272072720827209272102721127212272132721427215272162721727218272192722027221272222722327224272252722627227272282722927230272312723227233272342723527236272372723827239272402724127242272432724427245272462724727248272492725027251272522725327254272552725627257272582725927260272612726227263272642726527266272672726827269272702727127272272732727427275272762727727278272792728027281272822728327284272852728627287272882728927290272912729227293272942729527296272972729827299273002730127302273032730427305273062730727308273092731027311273122731327314273152731627317273182731927320273212732227323273242732527326273272732827329273302733127332273332733427335273362733727338273392734027341273422734327344273452734627347273482734927350273512735227353273542735527356273572735827359273602736127362273632736427365273662736727368273692737027371273722737327374273752737627377273782737927380273812738227383273842738527386273872738827389273902739127392273932739427395273962739727398273992740027401274022740327404274052740627407274082740927410274112741227413274142741527416274172741827419274202742127422274232742427425274262742727428274292743027431274322743327434274352743627437274382743927440274412744227443274442744527446274472744827449274502745127452274532745427455274562745727458274592746027461274622746327464274652746627467274682746927470274712747227473274742747527476274772747827479274802748127482274832748427485274862748727488274892749027491274922749327494274952749627497274982749927500275012750227503275042750527506275072750827509275102751127512275132751427515275162751727518275192752027521275222752327524275252752627527275282752927530275312753227533275342753527536275372753827539275402754127542275432754427545275462754727548275492755027551275522755327554275552755627557275582755927560275612756227563275642756527566275672756827569275702757127572275732757427575275762757727578275792758027581275822758327584275852758627587275882758927590275912759227593275942759527596275972759827599276002760127602276032760427605276062760727608276092761027611276122761327614276152761627617276182761927620276212762227623276242762527626276272762827629276302763127632276332763427635276362763727638276392764027641276422764327644276452764627647276482764927650276512765227653276542765527656276572765827659276602766127662276632766427665276662766727668276692767027671276722767327674276752767627677276782767927680276812768227683276842768527686276872768827689276902769127692276932769427695276962769727698276992770027701277022770327704277052770627707277082770927710277112771227713277142771527716277172771827719277202772127722277232772427725277262772727728277292773027731277322773327734277352773627737277382773927740277412774227743277442774527746277472774827749277502775127752277532775427755277562775727758277592776027761277622776327764277652776627767277682776927770277712777227773277742777527776277772777827779277802778127782277832778427785277862778727788277892779027791277922779327794277952779627797277982779927800278012780227803278042780527806278072780827809278102781127812278132781427815278162781727818278192782027821278222782327824278252782627827278282782927830278312783227833278342783527836278372783827839278402784127842278432784427845278462784727848278492785027851278522785327854278552785627857278582785927860278612786227863278642786527866278672786827869278702787127872278732787427875278762787727878278792788027881278822788327884278852788627887278882788927890278912789227893278942789527896278972789827899279002790127902279032790427905279062790727908279092791027911279122791327914279152791627917279182791927920279212792227923279242792527926279272792827929279302793127932279332793427935279362793727938279392794027941279422794327944279452794627947279482794927950279512795227953279542795527956279572795827959279602796127962279632796427965279662796727968279692797027971279722797327974279752797627977279782797927980279812798227983279842798527986279872798827989279902799127992279932799427995279962799727998279992800028001280022800328004280052800628007280082800928010280112801228013280142801528016280172801828019280202802128022280232802428025280262802728028280292803028031280322803328034280352803628037280382803928040280412804228043280442804528046280472804828049280502805128052280532805428055280562805728058280592806028061280622806328064280652806628067280682806928070280712807228073280742807528076280772807828079280802808128082280832808428085280862808728088280892809028091280922809328094280952809628097280982809928100281012810228103281042810528106281072810828109281102811128112281132811428115281162811728118281192812028121281222812328124281252812628127281282812928130281312813228133 |
- 2025-05-16 01:40:55,678 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-16 01:40:55,679 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-16 01:40:55,679 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-16 01:40:55,683 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-16 02:00:47,526 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-16 02:00:47,526 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-16 02:00:47,526 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-16 02:00:47,529 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-16 02:00:47,720 - __main__ - INFO - Starting pipeline with PID 347737
- 2025-05-16 02:00:47,720 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-16 02:03:44,171 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-16 02:03:44,171 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-16 02:03:44,171 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-16 02:03:44,175 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-16 02:03:44,416 - __main__ - INFO - Starting pipeline with PID 347855
- 2025-05-16 02:03:44,416 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-16 02:06:11,039 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-16 02:06:11,039 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-16 02:06:11,039 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-16 02:06:11,043 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-16 02:06:11,311 - __main__ - INFO - Starting pipeline with PID 347960
- 2025-05-16 02:06:11,311 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 01:34:19,419 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 01:34:19,419 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 01:34:19,420 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 01:34:19,424 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 01:34:19,659 - __main__ - INFO - Starting pipeline with PID 370510
- 2025-05-17 01:34:19,659 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 01:42:18,000 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 01:42:18,000 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 01:42:18,000 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 01:42:18,004 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 01:42:18,204 - __main__ - INFO - Starting pipeline with PID 370697
- 2025-05-17 01:42:18,204 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 01:46:11,794 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 01:46:12,829 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 01:46:13,879 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 01:46:14,944 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 01:46:16,011 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 01:46:17,040 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 01:46:17,815 - sglang - INFO - [2025-05-17 01:46:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=47741023, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 01:46:17,815 - __main__ - INFO - [2025-05-17 01:46:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=47741023, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 01:46:18,110 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 01:46:19,171 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 01:46:20,235 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 01:46:21,302 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 01:46:22,439 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 01:46:23,255 - sglang - INFO - [2025-05-17 01:46:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 01:46:23,255 - __main__ - INFO - [2025-05-17 01:46:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 01:46:23,515 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 01:46:24,582 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 01:46:25,649 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 01:46:26,716 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 01:46:27,783 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 01:46:28,847 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 01:46:29,906 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 01:46:30,974 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 01:46:32,048 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 01:46:32,899 - sglang - INFO - [2025-05-17 01:46:32 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 01:46:32,900 - __main__ - INFO - [2025-05-17 01:46:32 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 01:46:33,125 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 01:46:33,730 - sglang - INFO - [2025-05-17 01:46:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 01:46:33,730 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 01:46:33,730 - sglang - INFO - [2025-05-17 01:46:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 01:46:33,730 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 01:46:33,731 - sglang - INFO - [2025-05-17 01:46:33 TP0] Init torch distributed begin.
- 2025-05-17 01:46:33,731 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Init torch distributed begin.
- 2025-05-17 01:46:34,202 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 01:46:35,274 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 01:46:36,341 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 01:46:37,409 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 01:46:38,477 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 01:46:39,122 - sglang - INFO - [2025-05-17 01:46:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 01:46:39,123 - __main__ - INFO - [2025-05-17 01:46:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 01:46:39,555 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 01:46:40,623 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 01:46:40,752 - sglang - INFO - [2025-05-17 01:46:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 01:46:40,752 - __main__ - INFO - [2025-05-17 01:46:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 01:46:41,534 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 01:46:41,534 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 01:46:41,700 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 01:46:41,980 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.25it/s]
- 2025-05-17 01:46:41,980 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.25it/s]
- 2025-05-17 01:46:42,778 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 01:46:43,261 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.07it/s]
- 2025-05-17 01:46:43,261 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.07it/s]
- 2025-05-17 01:46:43,855 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 01:46:44,579 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
- 2025-05-17 01:46:44,579 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
- 2025-05-17 01:46:44,932 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 01:46:45,749 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.13s/it]
- 2025-05-17 01:46:45,749 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.13s/it]
- 2025-05-17 01:46:45,750 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.05s/it]
- 2025-05-17 01:46:45,750 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.05s/it]
- 2025-05-17 01:46:45,750 - sglang - INFO -
- 2025-05-17 01:46:45,750 - __main__ - INFO -
- 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 01:46:45,997 - __main__ - INFO - [2025-05-17 01:46:45 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 01:46:45,997 - __main__ - INFO - [2025-05-17 01:46:45 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 01:46:45,998 - __main__ - INFO - [2025-05-17 01:46:45 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 01:46:45,999 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 01:46:46,102 - sglang - INFO - [2025-05-17 01:46:46 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 01:46:46,103 - __main__ - INFO - [2025-05-17 01:46:46 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 01:46:47,075 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 01:46:48,152 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 01:46:48,276 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.45s/it]
50%|█████ | 2/4 [00:01<00:01, 1.35it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.96it/s]
100%|██████████| 4/4 [00:02<00:00, 2.48it/s]
100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
- 2025-05-17 01:46:48,276 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.45s/it]
50%|█████ | 2/4 [00:01<00:01, 1.35it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.96it/s]
100%|██████████| 4/4 [00:02<00:00, 2.48it/s]
100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
- 2025-05-17 01:46:48,276 - sglang - INFO - [2025-05-17 01:46:48 TP0] Capture cuda graph end. Time elapsed: 2.17 s
- 2025-05-17 01:46:48,277 - __main__ - INFO - [2025-05-17 01:46:48 TP0] Capture cuda graph end. Time elapsed: 2.17 s
- 2025-05-17 01:46:49,229 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 01:46:50,297 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-17 01:46:51,365 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-17 01:46:51,507 - sglang - INFO - [2025-05-17 01:46:51 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 01:46:51,507 - __main__ - INFO - [2025-05-17 01:46:51 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 01:46:52,460 - __main__ - INFO - sglang server is ready.
- 2025-05-17 01:46:52,461 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 01:46:52,461 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 01:46:52,461 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 01:46:52,462 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-05-17 01:46:52,462 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-05-17 01:46:52,467 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
- 2025-05-17 01:46:52,581 - sglang - INFO - [2025-05-17 01:46:52 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 01:46:52,581 - __main__ - INFO - [2025-05-17 01:46:52 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 01:46:52,581 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 01:46:54,142 - sglang - INFO - [2025-05-17 01:46:54] The server is fired up and ready to roll!
- 2025-05-17 01:46:54,142 - __main__ - INFO - [2025-05-17 01:46:54] The server is fired up and ready to roll!
- 2025-05-17 01:46:58,483 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
- 2025-05-17 01:47:02,463 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 01:47:02,463 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 01:47:02,464 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 01:47:04,736 - sglang - INFO - [2025-05-17 01:47:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 01:47:04,736 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 01:47:09,780 - sglang - INFO - [2025-05-17 01:47:09 TP0] Decode batch. #running-req: 1, #token: 1842, token usage: 0.05, gen throughput (token/s): 2.19, #queue-req: 0
- 2025-05-17 01:47:09,780 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:12,465 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 01:47:12,465 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 01:47:12,465 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 01:47:13,779 - sglang - INFO - [2025-05-17 01:47:13 TP0] Decode batch. #running-req: 1, #token: 1882, token usage: 0.05, gen throughput (token/s): 10.00, #queue-req: 0
- 2025-05-17 01:47:13,779 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:17,579 - sglang - INFO - [2025-05-17 01:47:17 TP0] Decode batch. #running-req: 1, #token: 1922, token usage: 0.05, gen throughput (token/s): 10.53, #queue-req: 0
- 2025-05-17 01:47:17,580 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:19,600 - sglang - INFO - [2025-05-17 01:47:19 TP0] Decode batch. #running-req: 1, #token: 1962, token usage: 0.05, gen throughput (token/s): 19.79, #queue-req: 0
- 2025-05-17 01:47:19,600 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:20,631 - sglang - INFO - [2025-05-17 01:47:20 TP0] Decode batch. #running-req: 1, #token: 2002, token usage: 0.05, gen throughput (token/s): 38.78, #queue-req: 0
- 2025-05-17 01:47:20,632 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:21,454 - sglang - INFO - [2025-05-17 01:47:21 TP0] Decode batch. #running-req: 1, #token: 2042, token usage: 0.05, gen throughput (token/s): 48.64, #queue-req: 0
- 2025-05-17 01:47:21,454 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:22,274 - sglang - INFO - [2025-05-17 01:47:22 TP0] Decode batch. #running-req: 1, #token: 2082, token usage: 0.05, gen throughput (token/s): 48.75, #queue-req: 0
- 2025-05-17 01:47:22,275 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:22,466 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 01:47:22,466 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 01:47:22,467 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 01:47:23,095 - sglang - INFO - [2025-05-17 01:47:23 TP0] Decode batch. #running-req: 1, #token: 2122, token usage: 0.06, gen throughput (token/s): 48.75, #queue-req: 0
- 2025-05-17 01:47:23,095 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:23,917 - sglang - INFO - [2025-05-17 01:47:23 TP0] Decode batch. #running-req: 1, #token: 2162, token usage: 0.06, gen throughput (token/s): 48.68, #queue-req: 0
- 2025-05-17 01:47:23,917 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:24,738 - sglang - INFO - [2025-05-17 01:47:24 TP0] Decode batch. #running-req: 1, #token: 2202, token usage: 0.06, gen throughput (token/s): 48.72, #queue-req: 0
- 2025-05-17 01:47:24,738 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:24,995 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 01:47:24,997 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 01:47:25,559 - sglang - INFO - [2025-05-17 01:47:25 TP0] Decode batch. #running-req: 1, #token: 2242, token usage: 0.06, gen throughput (token/s): 48.67, #queue-req: 0
- 2025-05-17 01:47:25,560 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 01:47:25,730 - __main__ - INFO - Finished TaskGroup for worker on 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-05-17 01:47:25,730 - __main__ - INFO - Got 1 docs for 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-05-17 01:47:25,731 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 01:47:25,732 - __main__ - INFO - Work done
- 2025-05-17 01:47:25,733 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 01:47:46,579 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 01:47:46,580 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 01:47:46,580 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 01:47:46,583 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 01:47:46,815 - __main__ - INFO - Starting pipeline with PID 371834
- 2025-05-17 01:47:46,815 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 01:47:52,318 - __main__ - INFO - No work to do, exiting
- 2025-05-17 02:06:25,410 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 02:06:25,410 - __main__ - INFO - Loading file at olmocr_workspace/job_1747418779/input.pdf as PDF document
- 2025-05-17 02:06:25,410 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 02:06:25,413 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 02:06:25,669 - __main__ - INFO - Starting pipeline with PID 372551
- 2025-05-17 02:06:25,669 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 02:06:26,283 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 02:06:27,320 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 02:06:28,373 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 02:06:29,440 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 02:06:30,508 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 02:06:31,575 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 02:06:32,643 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 02:06:32,693 - sglang - INFO - [2025-05-17 02:06:32] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=68477412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:06:32,694 - __main__ - INFO - [2025-05-17 02:06:32] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=68477412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:06:33,721 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 02:06:34,772 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 02:06:35,835 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 02:06:36,142 - sglang - INFO - [2025-05-17 02:06:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:06:36,142 - __main__ - INFO - [2025-05-17 02:06:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:06:36,911 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 02:06:37,978 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 02:06:39,046 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 02:06:40,113 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 02:06:41,182 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 02:06:42,250 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 02:06:42,356 - sglang - INFO - [2025-05-17 02:06:42 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:06:42,356 - __main__ - INFO - [2025-05-17 02:06:42 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Init torch distributed begin.
- 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Init torch distributed begin.
- 2025-05-17 02:06:43,328 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 02:06:44,391 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 02:06:45,463 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 02:06:46,529 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 02:06:47,595 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 02:06:48,662 - sglang - INFO - [2025-05-17 02:06:48 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:06:48,662 - __main__ - INFO - [2025-05-17 02:06:48 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:06:48,664 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 02:06:49,732 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 02:06:49,806 - sglang - INFO - [2025-05-17 02:06:49 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:06:49,806 - __main__ - INFO - [2025-05-17 02:06:49 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:06:50,809 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 02:06:50,829 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:06:50,829 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:06:51,188 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.79it/s]
- 2025-05-17 02:06:51,188 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.79it/s]
- 2025-05-17 02:06:51,887 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 02:06:52,325 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
- 2025-05-17 02:06:52,326 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
- 2025-05-17 02:06:52,964 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 02:06:53,620 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.03s/it]
- 2025-05-17 02:06:53,620 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.03s/it]
- 2025-05-17 02:06:54,040 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 02:06:54,849 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.11s/it]
- 2025-05-17 02:06:54,849 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.11s/it]
- 2025-05-17 02:06:54,849 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-05-17 02:06:54,849 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-05-17 02:06:54,849 - sglang - INFO -
- 2025-05-17 02:06:54,849 - __main__ - INFO -
- 2025-05-17 02:06:55,115 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 02:06:55,131 - sglang - INFO - [2025-05-17 02:06:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:06:55,132 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:06:55,137 - sglang - INFO - [2025-05-17 02:06:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:06:55,137 - __main__ - INFO - [2025-05-17 02:06:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:06:55,138 - sglang - INFO - [2025-05-17 02:06:55 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:06:55,138 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:06:55,283 - sglang - INFO - [2025-05-17 02:06:55 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:06:55,283 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:06:56,192 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 02:06:56,913 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.11it/s]
50%|█████ | 2/4 [00:01<00:01, 1.95it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s]
100%|██████████| 4/4 [00:01<00:00, 3.02it/s]
100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
- 2025-05-17 02:06:56,913 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.11it/s]
50%|█████ | 2/4 [00:01<00:01, 1.95it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s]
100%|██████████| 4/4 [00:01<00:00, 3.02it/s]
100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
- 2025-05-17 02:06:56,913 - sglang - INFO - [2025-05-17 02:06:56 TP0] Capture cuda graph end. Time elapsed: 1.63 s
- 2025-05-17 02:06:56,913 - __main__ - INFO - [2025-05-17 02:06:56 TP0] Capture cuda graph end. Time elapsed: 1.63 s
- 2025-05-17 02:06:57,268 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 02:06:58,336 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 02:06:59,395 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 02:06:59,613 - sglang - INFO - [2025-05-17 02:06:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:06:59,613 - __main__ - INFO - [2025-05-17 02:06:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:07:00,478 - __main__ - INFO - sglang server is ready.
- 2025-05-17 02:07:00,478 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 02:07:00,478 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:07:00,478 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 02:07:00,478 - __main__ - INFO - Worker 0 processing work item 1985df71617509ec45bc3584a8b12ba3e920e0d5
- 2025-05-17 02:07:00,479 - __main__ - INFO - Created all tasks for 1985df71617509ec45bc3584a8b12ba3e920e0d5
- 2025-05-17 02:07:00,481 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747418779/input.pdf in worker 0
- 2025-05-17 02:07:00,688 - sglang - INFO - [2025-05-17 02:07:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:07:00,688 - __main__ - INFO - [2025-05-17 02:07:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:07:00,688 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:07:01,251 - sglang - INFO - [2025-05-17 02:07:01] The server is fired up and ready to roll!
- 2025-05-17 02:07:01,251 - __main__ - INFO - [2025-05-17 02:07:01] The server is fired up and ready to roll!
- 2025-05-17 02:07:06,759 - __main__ - INFO - Built page query for olmocr_workspace/job_1747418779/input.pdf-1
- 2025-05-17 02:07:10,480 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:07:10,480 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:07:10,480 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 02:07:13,323 - sglang - INFO - [2025-05-17 02:07:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1840, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:07:13,323 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:07:17,479 - sglang - INFO - [2025-05-17 02:07:17 TP0] Decode batch. #running-req: 1, #token: 1873, token usage: 0.05, gen throughput (token/s): 2.24, #queue-req: 0
- 2025-05-17 02:07:17,479 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:20,482 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:07:20,482 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:07:20,482 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 02:07:21,301 - sglang - INFO - [2025-05-17 02:07:21 TP0] Decode batch. #running-req: 1, #token: 1913, token usage: 0.05, gen throughput (token/s): 10.47, #queue-req: 0
- 2025-05-17 02:07:21,301 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:23,379 - sglang - INFO - [2025-05-17 02:07:23 TP0] Decode batch. #running-req: 1, #token: 1953, token usage: 0.05, gen throughput (token/s): 19.25, #queue-req: 0
- 2025-05-17 02:07:23,379 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:24,962 - sglang - INFO - [2025-05-17 02:07:24 TP0] Decode batch. #running-req: 1, #token: 1993, token usage: 0.05, gen throughput (token/s): 25.27, #queue-req: 0
- 2025-05-17 02:07:24,962 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:25,796 - sglang - INFO - [2025-05-17 02:07:25 TP0] Decode batch. #running-req: 1, #token: 2033, token usage: 0.05, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-05-17 02:07:25,796 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:26,615 - sglang - INFO - [2025-05-17 02:07:26 TP0] Decode batch. #running-req: 1, #token: 2073, token usage: 0.05, gen throughput (token/s): 48.80, #queue-req: 0
- 2025-05-17 02:07:26,616 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:27,436 - sglang - INFO - [2025-05-17 02:07:27 TP0] Decode batch. #running-req: 1, #token: 2113, token usage: 0.06, gen throughput (token/s): 48.73, #queue-req: 0
- 2025-05-17 02:07:27,436 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:28,257 - sglang - INFO - [2025-05-17 02:07:28 TP0] Decode batch. #running-req: 1, #token: 2153, token usage: 0.06, gen throughput (token/s): 48.74, #queue-req: 0
- 2025-05-17 02:07:28,257 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:07:28,981 - __main__ - INFO - Finished TaskGroup for worker on 1985df71617509ec45bc3584a8b12ba3e920e0d5
- 2025-05-17 02:07:28,981 - __main__ - INFO - Got 1 docs for 1985df71617509ec45bc3584a8b12ba3e920e0d5
- 2025-05-17 02:07:28,982 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 02:07:28,984 - __main__ - INFO - Work done
- 2025-05-17 02:07:28,984 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 02:09:17,270 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 02:09:17,270 - __main__ - INFO - Loading file at olmocr_workspace/job_1747418950/input.pdf as PDF document
- 2025-05-17 02:09:17,270 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 02:09:17,273 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 02:09:17,507 - __main__ - INFO - Starting pipeline with PID 373591
- 2025-05-17 02:09:17,507 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 02:09:23,117 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 02:09:24,158 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 02:09:25,214 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 02:09:26,275 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 02:09:27,338 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 02:09:28,405 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 02:09:29,149 - sglang - INFO - [2025-05-17 02:09:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=493503861, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:09:29,149 - __main__ - INFO - [2025-05-17 02:09:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=493503861, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:09:29,480 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 02:09:30,548 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 02:09:31,615 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 02:09:32,682 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 02:09:33,750 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 02:09:34,820 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 02:09:35,888 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 02:09:36,955 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 02:09:37,795 - sglang - INFO - [2025-05-17 02:09:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:09:37,795 - __main__ - INFO - [2025-05-17 02:09:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:09:38,032 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 02:09:39,099 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 02:09:40,166 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 02:09:41,231 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 02:09:42,287 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 02:09:43,351 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 02:09:43,499 - sglang - INFO - [2025-05-17 02:09:43 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:09:43,499 - __main__ - INFO - [2025-05-17 02:09:43 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Init torch distributed begin.
- 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Init torch distributed begin.
- 2025-05-17 02:09:44,428 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 02:09:45,495 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 02:09:46,562 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 02:09:47,629 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 02:09:48,697 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 02:09:49,376 - sglang - INFO - [2025-05-17 02:09:49 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:09:49,376 - __main__ - INFO - [2025-05-17 02:09:49 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:09:49,775 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 02:09:50,468 - sglang - INFO - [2025-05-17 02:09:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:09:50,468 - __main__ - INFO - [2025-05-17 02:09:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:09:50,852 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 02:09:50,957 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:09:50,957 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:09:51,279 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.11it/s]
- 2025-05-17 02:09:51,279 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.11it/s]
- 2025-05-17 02:09:51,930 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 02:09:52,270 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.40it/s]
- 2025-05-17 02:09:52,270 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.40it/s]
- 2025-05-17 02:09:53,007 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 02:09:53,282 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
- 2025-05-17 02:09:53,282 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
- 2025-05-17 02:09:54,083 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 02:09:54,292 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
- 2025-05-17 02:09:54,292 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
- 2025-05-17 02:09:54,292 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 02:09:54,292 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 02:09:54,292 - sglang - INFO -
- 2025-05-17 02:09:54,292 - __main__ - INFO -
- 2025-05-17 02:09:54,469 - sglang - INFO - [2025-05-17 02:09:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:09:54,469 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:09:54,476 - sglang - INFO - [2025-05-17 02:09:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:09:54,476 - __main__ - INFO - [2025-05-17 02:09:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:09:54,477 - sglang - INFO - [2025-05-17 02:09:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:09:54,477 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:09:54,691 - sglang - INFO - [2025-05-17 02:09:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:09:54,691 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:09:55,161 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 02:09:56,237 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 02:09:56,344 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.09it/s]
50%|█████ | 2/4 [00:01<00:01, 1.92it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.54it/s]
100%|██████████| 4/4 [00:01<00:00, 2.98it/s]
100%|██████████| 4/4 [00:01<00:00, 2.42it/s]
- 2025-05-17 02:09:56,344 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.09it/s]
50%|█████ | 2/4 [00:01<00:01, 1.92it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.54it/s]
100%|██████████| 4/4 [00:01<00:00, 2.98it/s]
100%|██████████| 4/4 [00:01<00:00, 2.42it/s]
- 2025-05-17 02:09:56,345 - sglang - INFO - [2025-05-17 02:09:56 TP0] Capture cuda graph end. Time elapsed: 1.65 s
- 2025-05-17 02:09:56,345 - __main__ - INFO - [2025-05-17 02:09:56 TP0] Capture cuda graph end. Time elapsed: 1.65 s
- 2025-05-17 02:09:57,310 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 02:09:58,365 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 02:09:59,278 - sglang - INFO - [2025-05-17 02:09:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:09:59,279 - __main__ - INFO - [2025-05-17 02:09:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:09:59,453 - __main__ - INFO - sglang server is ready.
- 2025-05-17 02:09:59,453 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 02:09:59,453 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:09:59,453 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 02:09:59,454 - __main__ - INFO - Worker 0 processing work item 0da57e3be5fb46a909ca98a2aee35e16856bab58
- 2025-05-17 02:09:59,454 - __main__ - INFO - Created all tasks for 0da57e3be5fb46a909ca98a2aee35e16856bab58
- 2025-05-17 02:09:59,456 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747418950/input.pdf in worker 0
- 2025-05-17 02:10:00,385 - sglang - INFO - [2025-05-17 02:10:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:10:00,385 - __main__ - INFO - [2025-05-17 02:10:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:10:00,386 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:10:00,947 - sglang - INFO - [2025-05-17 02:10:00] The server is fired up and ready to roll!
- 2025-05-17 02:10:00,948 - __main__ - INFO - [2025-05-17 02:10:00] The server is fired up and ready to roll!
- 2025-05-17 02:10:05,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1747418950/input.pdf-1
- 2025-05-17 02:10:09,479 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:10:09,479 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:10:09,480 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 02:10:19,482 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:10:19,482 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:10:19,483 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 02:10:27,304 - sglang - INFO - [2025-05-17 02:10:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:10:27,304 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:10:28,725 - sglang - INFO - [2025-05-17 02:10:28 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.36, #queue-req: 0
- 2025-05-17 02:10:28,725 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:29,484 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:10:29,484 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:10:29,484 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 02:10:29,545 - sglang - INFO - [2025-05-17 02:10:29 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.76, #queue-req: 0
- 2025-05-17 02:10:29,545 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:30,363 - sglang - INFO - [2025-05-17 02:10:30 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.89, #queue-req: 0
- 2025-05-17 02:10:30,363 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:31,182 - sglang - INFO - [2025-05-17 02:10:31 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.85, #queue-req: 0
- 2025-05-17 02:10:31,182 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:31,590 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 02:10:32,000 - sglang - INFO - [2025-05-17 02:10:32 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
- 2025-05-17 02:10:32,001 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:32,819 - sglang - INFO - [2025-05-17 02:10:32 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.84, #queue-req: 0
- 2025-05-17 02:10:32,819 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:10:33,446 - __main__ - INFO - Finished TaskGroup for worker on 0da57e3be5fb46a909ca98a2aee35e16856bab58
- 2025-05-17 02:10:33,446 - __main__ - INFO - Got 1 docs for 0da57e3be5fb46a909ca98a2aee35e16856bab58
- 2025-05-17 02:10:33,448 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 02:10:33,448 - __main__ - INFO - Work done
- 2025-05-17 02:10:33,448 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 02:12:55,176 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 02:12:55,176 - __main__ - INFO - Loading file at olmocr_workspace/job_1747419168/input.pdf as PDF document
- 2025-05-17 02:12:55,176 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 02:12:55,183 - __main__ - INFO - Calculated items_per_group: 62 based on average pages per PDF: 8.00
- 2025-05-17 02:12:55,421 - __main__ - INFO - Starting pipeline with PID 374647
- 2025-05-17 02:12:55,421 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 02:13:01,024 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 02:13:02,064 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 02:13:03,120 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 02:13:04,188 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 02:13:05,256 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 02:13:06,244 - sglang - INFO - [2025-05-17 02:13:06] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=872630305, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:13:06,244 - __main__ - INFO - [2025-05-17 02:13:06] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=872630305, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:13:06,386 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 02:13:07,437 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 02:13:08,501 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 02:13:09,570 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 02:13:10,640 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 02:13:11,707 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 02:13:12,783 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 02:13:13,857 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 02:13:14,765 - sglang - INFO - [2025-05-17 02:13:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:13:14,765 - __main__ - INFO - [2025-05-17 02:13:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:13:14,934 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 02:13:16,008 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 02:13:17,076 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 02:13:18,136 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 02:13:19,206 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 02:13:20,279 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 02:13:20,559 - sglang - INFO - [2025-05-17 02:13:20 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:13:20,559 - __main__ - INFO - [2025-05-17 02:13:20 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:13:21,049 - sglang - INFO - [2025-05-17 02:13:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:13:21,049 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:13:21,049 - sglang - INFO - [2025-05-17 02:13:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:13:21,049 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:13:21,050 - sglang - INFO - [2025-05-17 02:13:21 TP0] Init torch distributed begin.
- 2025-05-17 02:13:21,050 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Init torch distributed begin.
- 2025-05-17 02:13:21,357 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 02:13:22,424 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 02:13:23,487 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 02:13:24,554 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 02:13:25,624 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 02:13:26,409 - sglang - INFO - [2025-05-17 02:13:26 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:13:26,409 - __main__ - INFO - [2025-05-17 02:13:26 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:13:26,710 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 02:13:27,540 - sglang - INFO - [2025-05-17 02:13:27 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:13:27,540 - __main__ - INFO - [2025-05-17 02:13:27 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:13:27,778 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 02:13:28,055 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:13:28,055 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:13:28,577 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 1.92it/s]
- 2025-05-17 02:13:28,577 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 1.92it/s]
- 2025-05-17 02:13:28,818 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 02:13:29,879 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 02:13:29,976 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:02, 1.04s/it]
- 2025-05-17 02:13:29,976 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:02, 1.04s/it]
- 2025-05-17 02:13:30,958 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 02:13:31,289 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
- 2025-05-17 02:13:31,289 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
- 2025-05-17 02:13:32,036 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 02:13:32,471 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.17s/it]
- 2025-05-17 02:13:32,471 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.17s/it]
- 2025-05-17 02:13:32,471 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.10s/it]
- 2025-05-17 02:13:32,471 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.10s/it]
- 2025-05-17 02:13:32,471 - sglang - INFO -
- 2025-05-17 02:13:32,471 - __main__ - INFO -
- 2025-05-17 02:13:32,656 - sglang - INFO - [2025-05-17 02:13:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:13:32,656 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:13:32,662 - sglang - INFO - [2025-05-17 02:13:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:13:32,662 - __main__ - INFO - [2025-05-17 02:13:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:13:32,662 - sglang - INFO - [2025-05-17 02:13:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:13:32,662 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:13:32,819 - sglang - INFO - [2025-05-17 02:13:32 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:13:32,819 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:13:33,114 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 02:13:34,191 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 02:13:34,510 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.86it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.94it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 02:13:34,510 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.86it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.94it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 02:13:34,511 - sglang - INFO - [2025-05-17 02:13:34 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 02:13:34,511 - __main__ - INFO - [2025-05-17 02:13:34 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 02:13:35,269 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 02:13:36,337 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 02:13:36,877 - sglang - INFO - [2025-05-17 02:13:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:13:36,877 - __main__ - INFO - [2025-05-17 02:13:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:13:37,430 - __main__ - INFO - sglang server is ready.
- 2025-05-17 02:13:37,431 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 02:13:37,431 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:13:37,431 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 02:13:37,431 - __main__ - INFO - Worker 0 processing work item 68de5843976d18df5ea068383feb21dd169b186d
- 2025-05-17 02:13:37,431 - __main__ - INFO - Created all tasks for 68de5843976d18df5ea068383feb21dd169b186d
- 2025-05-17 02:13:37,439 - __main__ - INFO - Got 8 pages to do for olmocr_workspace/job_1747419168/input.pdf in worker 0
- 2025-05-17 02:13:37,952 - sglang - INFO - [2025-05-17 02:13:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:13:37,952 - __main__ - INFO - [2025-05-17 02:13:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:13:37,952 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:13:38,982 - sglang - INFO - [2025-05-17 02:13:38] The server is fired up and ready to roll!
- 2025-05-17 02:13:38,982 - __main__ - INFO - [2025-05-17 02:13:38] The server is fired up and ready to roll!
- 2025-05-17 02:13:44,098 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-1
- 2025-05-17 02:13:44,105 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-2
- 2025-05-17 02:13:44,149 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-3
- 2025-05-17 02:13:44,185 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-6
- 2025-05-17 02:13:44,206 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-4
- 2025-05-17 02:13:44,208 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-5
- 2025-05-17 02:13:44,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-8
- 2025-05-17 02:13:44,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-7
- 2025-05-17 02:13:47,479 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:13:47,479 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:13:47,480 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 8
- 2025-05-17 02:13:57,481 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:13:57,481 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:13:57,481 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 8
- 2025-05-17 02:14:05,287 - sglang - INFO - [2025-05-17 02:14:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1171, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:14:05,287 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:14:06,033 - sglang - INFO - [2025-05-17 02:14:06 TP0] Prefill batch. #new-seq: 6, #new-token: 12991, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 1
- 2025-05-17 02:14:06,033 - __main__ - INFO - sglang running req: 1 queue req: 1
- 2025-05-17 02:14:07,482 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:14:07,482 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:14:07,483 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 8
- 2025-05-17 02:14:11,389 - sglang - INFO - [2025-05-17 02:14:11 TP0] Decode batch. #running-req: 7, #token: 13189, token usage: 0.35, gen throughput (token/s): 6.90, #queue-req: 1
- 2025-05-17 02:14:11,389 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-05-17 02:14:11,411 - sglang - INFO - [2025-05-17 02:14:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1764, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.35, #running-req: 6, #queue-req: 0
- 2025-05-17 02:14:11,411 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:12,294 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 02:14:12,857 - sglang - INFO - [2025-05-17 02:14:12 TP0] Decode batch. #running-req: 6, #token: 14047, token usage: 0.37, gen throughput (token/s): 175.03, #queue-req: 0
- 2025-05-17 02:14:12,857 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:13,726 - sglang - INFO - [2025-05-17 02:14:13 TP0] Decode batch. #running-req: 6, #token: 14287, token usage: 0.38, gen throughput (token/s): 276.13, #queue-req: 0
- 2025-05-17 02:14:13,727 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:14,595 - sglang - INFO - [2025-05-17 02:14:14 TP0] Decode batch. #running-req: 6, #token: 14527, token usage: 0.38, gen throughput (token/s): 276.46, #queue-req: 0
- 2025-05-17 02:14:14,595 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:15,463 - sglang - INFO - [2025-05-17 02:14:15 TP0] Decode batch. #running-req: 6, #token: 14767, token usage: 0.39, gen throughput (token/s): 276.44, #queue-req: 0
- 2025-05-17 02:14:15,463 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:16,331 - sglang - INFO - [2025-05-17 02:14:16 TP0] Decode batch. #running-req: 6, #token: 15007, token usage: 0.40, gen throughput (token/s): 276.44, #queue-req: 0
- 2025-05-17 02:14:16,331 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:17,200 - sglang - INFO - [2025-05-17 02:14:17 TP0] Decode batch. #running-req: 6, #token: 15247, token usage: 0.40, gen throughput (token/s): 276.27, #queue-req: 0
- 2025-05-17 02:14:17,200 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:17,484 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:14:17,484 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 27.73 27.73
- sglang_output_tokens 1.04 1.04
- 2025-05-17 02:14:17,484 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 8
- 2025-05-17 02:14:18,070 - sglang - INFO - [2025-05-17 02:14:18 TP0] Decode batch. #running-req: 6, #token: 15487, token usage: 0.41, gen throughput (token/s): 275.60, #queue-req: 0
- 2025-05-17 02:14:18,071 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:18,942 - sglang - INFO - [2025-05-17 02:14:18 TP0] Decode batch. #running-req: 6, #token: 15727, token usage: 0.41, gen throughput (token/s): 275.41, #queue-req: 0
- 2025-05-17 02:14:18,942 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 02:14:19,813 - sglang - INFO - [2025-05-17 02:14:19 TP0] Decode batch. #running-req: 5, #token: 13844, token usage: 0.36, gen throughput (token/s): 270.98, #queue-req: 0
- 2025-05-17 02:14:19,813 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:14:20,680 - sglang - INFO - [2025-05-17 02:14:20 TP0] Decode batch. #running-req: 5, #token: 14044, token usage: 0.37, gen throughput (token/s): 230.75, #queue-req: 0
- 2025-05-17 02:14:20,680 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:14:21,538 - sglang - INFO - [2025-05-17 02:14:21 TP0] Decode batch. #running-req: 3, #token: 8910, token usage: 0.23, gen throughput (token/s): 192.30, #queue-req: 0
- 2025-05-17 02:14:21,538 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 02:14:22,384 - sglang - INFO - [2025-05-17 02:14:22 TP0] Decode batch. #running-req: 3, #token: 9030, token usage: 0.24, gen throughput (token/s): 141.83, #queue-req: 0
- 2025-05-17 02:14:22,384 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 02:14:23,231 - sglang - INFO - [2025-05-17 02:14:23 TP0] Decode batch. #running-req: 3, #token: 9150, token usage: 0.24, gen throughput (token/s): 141.64, #queue-req: 0
- 2025-05-17 02:14:23,231 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 02:14:24,072 - sglang - INFO - [2025-05-17 02:14:24 TP0] Decode batch. #running-req: 2, #token: 6371, token usage: 0.17, gen throughput (token/s): 116.49, #queue-req: 0
- 2025-05-17 02:14:24,072 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-17 02:14:24,906 - sglang - INFO - [2025-05-17 02:14:24 TP0] Decode batch. #running-req: 2, #token: 6451, token usage: 0.17, gen throughput (token/s): 95.96, #queue-req: 0
- 2025-05-17 02:14:24,906 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-17 02:14:25,734 - sglang - INFO - [2025-05-17 02:14:25 TP0] Decode batch. #running-req: 1, #token: 3584, token usage: 0.09, gen throughput (token/s): 54.32, #queue-req: 0
- 2025-05-17 02:14:25,734 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:26,562 - sglang - INFO - [2025-05-17 02:14:26 TP0] Decode batch. #running-req: 1, #token: 3624, token usage: 0.10, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-05-17 02:14:26,562 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:27,389 - sglang - INFO - [2025-05-17 02:14:27 TP0] Decode batch. #running-req: 1, #token: 3664, token usage: 0.10, gen throughput (token/s): 48.32, #queue-req: 0
- 2025-05-17 02:14:27,390 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:27,485 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:14:27,485 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 140.98 140.98
- sglang_output_tokens 27.79 27.79
- 2025-05-17 02:14:27,485 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 8
- 2025-05-17 02:14:28,218 - sglang - INFO - [2025-05-17 02:14:28 TP0] Decode batch. #running-req: 1, #token: 3704, token usage: 0.10, gen throughput (token/s): 48.29, #queue-req: 0
- 2025-05-17 02:14:28,218 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:29,045 - sglang - INFO - [2025-05-17 02:14:29 TP0] Decode batch. #running-req: 1, #token: 3744, token usage: 0.10, gen throughput (token/s): 48.32, #queue-req: 0
- 2025-05-17 02:14:29,046 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:29,873 - sglang - INFO - [2025-05-17 02:14:29 TP0] Decode batch. #running-req: 1, #token: 3784, token usage: 0.10, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-05-17 02:14:29,873 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:14:30,148 - __main__ - INFO - Finished TaskGroup for worker on 68de5843976d18df5ea068383feb21dd169b186d
- 2025-05-17 02:14:30,149 - __main__ - INFO - Got 1 docs for 68de5843976d18df5ea068383feb21dd169b186d
- 2025-05-17 02:14:30,150 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 02:14:30,150 - __main__ - INFO - Work done
- 2025-05-17 02:14:30,151 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 02:22:17,807 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 02:22:17,807 - __main__ - INFO - Loading file at olmocr_workspace/job_1747419731/input.pdf as PDF document
- 2025-05-17 02:22:17,807 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 02:22:17,832 - __main__ - INFO - Calculated items_per_group: 7 based on average pages per PDF: 67.00
- 2025-05-17 02:22:18,076 - __main__ - INFO - Starting pipeline with PID 376290
- 2025-05-17 02:22:18,076 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 02:22:18,847 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 02:22:19,878 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 02:22:20,913 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 02:22:21,966 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 02:22:23,029 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 02:22:24,140 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 02:22:24,764 - sglang - INFO - [2025-05-17 02:22:24] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=562155338, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:22:24,764 - __main__ - INFO - [2025-05-17 02:22:24] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=562155338, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:22:25,216 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 02:22:26,281 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 02:22:27,348 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 02:22:28,033 - sglang - INFO - [2025-05-17 02:22:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:22:28,033 - __main__ - INFO - [2025-05-17 02:22:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:22:28,425 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 02:22:29,492 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 02:22:30,559 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 02:22:31,626 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 02:22:32,695 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 02:22:33,231 - sglang - INFO - [2025-05-17 02:22:33 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:22:33,231 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Init torch distributed begin.
- 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Init torch distributed begin.
- 2025-05-17 02:22:33,771 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 02:22:34,844 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 02:22:35,908 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 02:22:36,964 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 02:22:38,026 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 02:22:39,092 - sglang - INFO - [2025-05-17 02:22:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:22:39,092 - __main__ - INFO - [2025-05-17 02:22:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:22:39,093 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 02:22:40,160 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 02:22:40,194 - sglang - INFO - [2025-05-17 02:22:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:22:40,194 - __main__ - INFO - [2025-05-17 02:22:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:22:40,679 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:22:40,679 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:22:41,004 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
- 2025-05-17 02:22:41,004 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
- 2025-05-17 02:22:41,238 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 02:22:42,072 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.31it/s]
- 2025-05-17 02:22:42,073 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.31it/s]
- 2025-05-17 02:22:42,317 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 02:22:43,146 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.11it/s]
- 2025-05-17 02:22:43,146 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.11it/s]
- 2025-05-17 02:22:43,395 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 02:22:44,205 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
- 2025-05-17 02:22:44,205 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
- 2025-05-17 02:22:44,205 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.13it/s]
- 2025-05-17 02:22:44,205 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.13it/s]
- 2025-05-17 02:22:44,205 - sglang - INFO -
- 2025-05-17 02:22:44,205 - __main__ - INFO -
- 2025-05-17 02:22:44,359 - sglang - INFO - [2025-05-17 02:22:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:22:44,359 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:22:44,366 - sglang - INFO - [2025-05-17 02:22:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:22:44,366 - __main__ - INFO - [2025-05-17 02:22:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:22:44,366 - sglang - INFO - [2025-05-17 02:22:44 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:22:44,366 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:22:44,472 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 02:22:44,560 - sglang - INFO - [2025-05-17 02:22:44 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:22:44,560 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:22:45,549 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 02:22:46,181 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.12it/s]
50%|█████ | 2/4 [00:01<00:01, 1.96it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.58it/s]
100%|██████████| 4/4 [00:01<00:00, 3.03it/s]
100%|██████████| 4/4 [00:01<00:00, 2.47it/s]
- 2025-05-17 02:22:46,181 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.12it/s]
50%|█████ | 2/4 [00:01<00:01, 1.96it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.58it/s]
100%|██████████| 4/4 [00:01<00:00, 3.03it/s]
100%|██████████| 4/4 [00:01<00:00, 2.47it/s]
- 2025-05-17 02:22:46,181 - sglang - INFO - [2025-05-17 02:22:46 TP0] Capture cuda graph end. Time elapsed: 1.62 s
- 2025-05-17 02:22:46,181 - __main__ - INFO - [2025-05-17 02:22:46 TP0] Capture cuda graph end. Time elapsed: 1.62 s
- 2025-05-17 02:22:46,626 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 02:22:47,695 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 02:22:48,763 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 02:22:48,820 - sglang - INFO - [2025-05-17 02:22:48 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:22:48,821 - __main__ - INFO - [2025-05-17 02:22:48 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:22:49,856 - __main__ - INFO - sglang server is ready.
- 2025-05-17 02:22:49,857 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 02:22:49,857 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:22:49,857 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 02:22:49,857 - __main__ - INFO - Worker 0 processing work item a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
- 2025-05-17 02:22:49,857 - __main__ - INFO - Created all tasks for a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
- 2025-05-17 02:22:49,891 - __main__ - INFO - Got 67 pages to do for olmocr_workspace/job_1747419731/input.pdf in worker 0
- 2025-05-17 02:22:50,022 - sglang - INFO - [2025-05-17 02:22:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:22:50,023 - __main__ - INFO - [2025-05-17 02:22:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:22:50,023 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:22:51,583 - sglang - INFO - [2025-05-17 02:22:51] The server is fired up and ready to roll!
- 2025-05-17 02:22:51,584 - __main__ - INFO - [2025-05-17 02:22:51] The server is fired up and ready to roll!
- 2025-05-17 02:22:59,858 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:22:59,859 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:22:59,859 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 67
- 2025-05-17 02:23:02,345 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-1
- 2025-05-17 02:23:02,372 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-2
- 2025-05-17 02:23:02,428 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-3
- 2025-05-17 02:23:02,432 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-4
- 2025-05-17 02:23:02,482 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-6
- 2025-05-17 02:23:02,490 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-5
- 2025-05-17 02:23:02,540 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-7
- 2025-05-17 02:23:02,553 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-9
- 2025-05-17 02:23:02,557 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-8
- 2025-05-17 02:23:02,610 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-13
- 2025-05-17 02:23:02,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-11
- 2025-05-17 02:23:02,629 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-14
- 2025-05-17 02:23:02,636 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-15
- 2025-05-17 02:23:02,645 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-16
- 2025-05-17 02:23:02,671 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-10
- 2025-05-17 02:23:02,672 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-18
- 2025-05-17 02:23:02,675 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-17
- 2025-05-17 02:23:02,678 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-19
- 2025-05-17 02:23:02,686 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-20
- 2025-05-17 02:23:02,700 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-22
- 2025-05-17 02:23:02,701 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-21
- 2025-05-17 02:23:02,707 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-12
- 2025-05-17 02:23:02,720 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-23
- 2025-05-17 02:23:02,733 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-24
- 2025-05-17 02:23:02,735 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-26
- 2025-05-17 02:23:02,744 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-27
- 2025-05-17 02:23:02,747 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-28
- 2025-05-17 02:23:02,761 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-29
- 2025-05-17 02:23:02,765 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-31
- 2025-05-17 02:23:02,767 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-30
- 2025-05-17 02:23:02,769 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-32
- 2025-05-17 02:23:02,796 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-33
- 2025-05-17 02:23:02,800 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-34
- 2025-05-17 02:23:02,817 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-41
- 2025-05-17 02:23:02,824 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-39
- 2025-05-17 02:23:02,825 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-25
- 2025-05-17 02:23:02,829 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-37
- 2025-05-17 02:23:02,855 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-42
- 2025-05-17 02:23:02,866 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-44
- 2025-05-17 02:23:02,867 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-43
- 2025-05-17 02:23:02,882 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-46
- 2025-05-17 02:23:02,886 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-52
- 2025-05-17 02:23:02,890 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-47
- 2025-05-17 02:23:02,891 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-48
- 2025-05-17 02:23:02,893 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-51
- 2025-05-17 02:23:02,897 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-50
- 2025-05-17 02:23:02,903 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-36
- 2025-05-17 02:23:02,913 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-35
- 2025-05-17 02:23:02,921 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-38
- 2025-05-17 02:23:02,923 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-53
- 2025-05-17 02:23:02,934 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-55
- 2025-05-17 02:23:02,937 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-58
- 2025-05-17 02:23:02,938 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-59
- 2025-05-17 02:23:02,938 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-66
- 2025-05-17 02:23:02,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-54
- 2025-05-17 02:23:02,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-40
- 2025-05-17 02:23:02,943 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-56
- 2025-05-17 02:23:02,944 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-67
- 2025-05-17 02:23:02,954 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-61
- 2025-05-17 02:23:02,981 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-57
- 2025-05-17 02:23:02,983 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-62
- 2025-05-17 02:23:02,984 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-60
- 2025-05-17 02:23:02,984 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-45
- 2025-05-17 02:23:02,985 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-65
- 2025-05-17 02:23:02,988 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-64
- 2025-05-17 02:23:02,989 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-63
- 2025-05-17 02:23:02,992 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-49
- 2025-05-17 02:23:09,861 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:09,861 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:23:09,861 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 67
- 2025-05-17 02:23:14,583 - sglang - INFO - [2025-05-17 02:23:14 TP0] Prefill batch. #new-seq: 1, #new-token: 1171, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:23:14,584 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:23:18,202 - sglang - INFO - [2025-05-17 02:23:18 TP0] Prefill batch. #new-seq: 7, #new-token: 12863, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 59
- 2025-05-17 02:23:18,203 - __main__ - INFO - sglang running req: 1 queue req: 59
- 2025-05-17 02:23:19,863 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:19,863 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:23:19,863 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 67
- 2025-05-17 02:23:24,325 - sglang - INFO - [2025-05-17 02:23:24 TP0] Decode batch. #running-req: 8, #token: 13094, token usage: 0.34, gen throughput (token/s): 7.63, #queue-req: 59
- 2025-05-17 02:23:24,325 - __main__ - INFO - sglang running req: 8 queue req: 59
- 2025-05-17 02:23:24,347 - sglang - INFO - [2025-05-17 02:23:24 TP0] Prefill batch. #new-seq: 2, #new-token: 3810, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.34, #running-req: 7, #queue-req: 57
- 2025-05-17 02:23:24,347 - __main__ - INFO - sglang running req: 7 queue req: 57
- 2025-05-17 02:23:26,480 - sglang - INFO - [2025-05-17 02:23:26 TP0] Decode batch. #running-req: 9, #token: 17262, token usage: 0.45, gen throughput (token/s): 166.10, #queue-req: 57
- 2025-05-17 02:23:26,481 - __main__ - INFO - sglang running req: 9 queue req: 57
- 2025-05-17 02:23:26,828 - sglang - INFO - [2025-05-17 02:23:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2193, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.42, #running-req: 8, #queue-req: 56
- 2025-05-17 02:23:26,828 - __main__ - INFO - sglang running req: 8 queue req: 56
- 2025-05-17 02:23:28,125 - sglang - INFO - [2025-05-17 02:23:28 TP0] Decode batch. #running-req: 9, #token: 18453, token usage: 0.49, gen throughput (token/s): 218.32, #queue-req: 56
- 2025-05-17 02:23:28,125 - __main__ - INFO - sglang running req: 9 queue req: 56
- 2025-05-17 02:23:29,055 - sglang - INFO - [2025-05-17 02:23:29 TP0] Decode batch. #running-req: 9, #token: 18813, token usage: 0.50, gen throughput (token/s): 386.98, #queue-req: 56
- 2025-05-17 02:23:29,055 - __main__ - INFO - sglang running req: 9 queue req: 56
- 2025-05-17 02:23:29,865 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:29,865 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 33.93 33.93
- sglang_output_tokens 1.69 1.69
- 2025-05-17 02:23:29,865 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 67
- 2025-05-17 02:23:29,985 - sglang - INFO - [2025-05-17 02:23:29 TP0] Decode batch. #running-req: 9, #token: 19173, token usage: 0.50, gen throughput (token/s): 387.01, #queue-req: 56
- 2025-05-17 02:23:29,985 - __main__ - INFO - sglang running req: 9 queue req: 56
- 2025-05-17 02:23:30,892 - sglang - INFO - [2025-05-17 02:23:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2008, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.47, #running-req: 8, #queue-req: 55
- 2025-05-17 02:23:30,892 - __main__ - INFO - sglang running req: 8 queue req: 55
- 2025-05-17 02:23:31,566 - sglang - INFO - [2025-05-17 02:23:31 TP0] Decode batch. #running-req: 9, #token: 19707, token usage: 0.52, gen throughput (token/s): 227.05, #queue-req: 55
- 2025-05-17 02:23:31,566 - __main__ - INFO - sglang running req: 9 queue req: 55
- 2025-05-17 02:23:32,380 - sglang - INFO - [2025-05-17 02:23:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4339, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.48, #running-req: 8, #queue-req: 53
- 2025-05-17 02:23:32,380 - __main__ - INFO - sglang running req: 8 queue req: 53
- 2025-05-17 02:23:33,878 - sglang - INFO - [2025-05-17 02:23:33 TP0] Decode batch. #running-req: 10, #token: 22444, token usage: 0.59, gen throughput (token/s): 157.44, #queue-req: 53
- 2025-05-17 02:23:33,879 - __main__ - INFO - sglang running req: 10 queue req: 53
- 2025-05-17 02:23:34,823 - sglang - INFO - [2025-05-17 02:23:34 TP0] Decode batch. #running-req: 10, #token: 22844, token usage: 0.60, gen throughput (token/s): 423.37, #queue-req: 53
- 2025-05-17 02:23:34,823 - __main__ - INFO - sglang running req: 10 queue req: 53
- 2025-05-17 02:23:35,201 - sglang - INFO - [2025-05-17 02:23:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2028, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 52
- 2025-05-17 02:23:35,201 - __main__ - INFO - sglang running req: 9 queue req: 52
- 2025-05-17 02:23:36,266 - sglang - INFO - [2025-05-17 02:23:36 TP0] Prefill batch. #new-seq: 1, #new-token: 1857, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 51
- 2025-05-17 02:23:36,266 - __main__ - INFO - sglang running req: 9 queue req: 51
- 2025-05-17 02:23:37,087 - sglang - INFO - [2025-05-17 02:23:37 TP0] Decode batch. #running-req: 10, #token: 22318, token usage: 0.59, gen throughput (token/s): 175.82, #queue-req: 51
- 2025-05-17 02:23:37,087 - __main__ - INFO - sglang running req: 10 queue req: 51
- 2025-05-17 02:23:37,181 - sglang - INFO - [2025-05-17 02:23:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1843, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 9, #queue-req: 50
- 2025-05-17 02:23:37,181 - __main__ - INFO - sglang running req: 9 queue req: 50
- 2025-05-17 02:23:37,889 - sglang - INFO - [2025-05-17 02:23:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1909, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.52, #running-req: 9, #queue-req: 49
- 2025-05-17 02:23:37,889 - __main__ - INFO - sglang running req: 9 queue req: 49
- 2025-05-17 02:23:39,356 - sglang - INFO - [2025-05-17 02:23:39 TP0] Decode batch. #running-req: 10, #token: 22012, token usage: 0.58, gen throughput (token/s): 175.38, #queue-req: 49
- 2025-05-17 02:23:39,356 - __main__ - INFO - sglang running req: 10 queue req: 49
- 2025-05-17 02:23:39,866 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:39,866 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 166.83 166.83
- sglang_output_tokens 23.70 23.70
- 2025-05-17 02:23:39,867 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 67
- 2025-05-17 02:23:40,300 - sglang - INFO - [2025-05-17 02:23:40 TP0] Decode batch. #running-req: 10, #token: 22412, token usage: 0.59, gen throughput (token/s): 423.76, #queue-req: 49
- 2025-05-17 02:23:40,300 - __main__ - INFO - sglang running req: 10 queue req: 49
- 2025-05-17 02:23:41,245 - sglang - INFO - [2025-05-17 02:23:41 TP0] Decode batch. #running-req: 10, #token: 22812, token usage: 0.60, gen throughput (token/s): 423.19, #queue-req: 49
- 2025-05-17 02:23:41,245 - __main__ - INFO - sglang running req: 10 queue req: 49
- 2025-05-17 02:23:41,388 - sglang - INFO - [2025-05-17 02:23:41 TP0] Prefill batch. #new-seq: 2, #new-token: 4101, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 47
- 2025-05-17 02:23:41,388 - __main__ - INFO - sglang running req: 9 queue req: 47
- 2025-05-17 02:23:43,418 - sglang - INFO - [2025-05-17 02:23:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2197, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.59, #running-req: 10, #queue-req: 46
- 2025-05-17 02:23:43,419 - __main__ - INFO - sglang running req: 10 queue req: 46
- 2025-05-17 02:23:44,339 - sglang - INFO - [2025-05-17 02:23:44 TP0] Decode batch. #running-req: 11, #token: 24808, token usage: 0.65, gen throughput (token/s): 139.63, #queue-req: 46
- 2025-05-17 02:23:44,339 - __main__ - INFO - sglang running req: 11 queue req: 46
- 2025-05-17 02:23:45,222 - sglang - INFO - [2025-05-17 02:23:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3966, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 10, #queue-req: 44
- 2025-05-17 02:23:45,222 - __main__ - INFO - sglang running req: 10 queue req: 44
- 2025-05-17 02:23:46,590 - sglang - INFO - [2025-05-17 02:23:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2057, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 11, #queue-req: 43
- 2025-05-17 02:23:46,590 - __main__ - INFO - sglang running req: 11 queue req: 43
- 2025-05-17 02:23:47,330 - sglang - INFO - [2025-05-17 02:23:47 TP0] Decode batch. #running-req: 12, #token: 26764, token usage: 0.70, gen throughput (token/s): 147.43, #queue-req: 43
- 2025-05-17 02:23:47,331 - __main__ - INFO - sglang running req: 12 queue req: 43
- 2025-05-17 02:23:47,669 - sglang - INFO - [2025-05-17 02:23:47 TP0] Prefill batch. #new-seq: 1, #new-token: 1996, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 11, #queue-req: 42
- 2025-05-17 02:23:47,669 - __main__ - INFO - sglang running req: 11 queue req: 42
- 2025-05-17 02:23:48,979 - sglang - INFO - [2025-05-17 02:23:48 TP0] Decode batch. #running-req: 12, #token: 26496, token usage: 0.70, gen throughput (token/s): 290.61, #queue-req: 42
- 2025-05-17 02:23:48,979 - __main__ - INFO - sglang running req: 12 queue req: 42
- 2025-05-17 02:23:49,868 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:49,869 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 259.43 259.43
- sglang_output_tokens 40.62 40.62
- 2025-05-17 02:23:49,869 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 67
- 2025-05-17 02:23:49,942 - sglang - INFO - [2025-05-17 02:23:49 TP0] Decode batch. #running-req: 12, #token: 26976, token usage: 0.71, gen throughput (token/s): 498.13, #queue-req: 42
- 2025-05-17 02:23:49,942 - __main__ - INFO - sglang running req: 12 queue req: 42
- 2025-05-17 02:23:50,812 - sglang - INFO - [2025-05-17 02:23:50 TP0] Prefill batch. #new-seq: 2, #new-token: 4147, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 11, #queue-req: 40
- 2025-05-17 02:23:50,812 - __main__ - INFO - sglang running req: 11 queue req: 40
- 2025-05-17 02:23:52,309 - sglang - INFO - [2025-05-17 02:23:52 TP0] Decode batch. #running-req: 13, #token: 28833, token usage: 0.76, gen throughput (token/s): 204.08, #queue-req: 40
- 2025-05-17 02:23:52,309 - __main__ - INFO - sglang running req: 13 queue req: 40
- 2025-05-17 02:23:52,382 - sglang - INFO - [2025-05-17 02:23:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2124, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 39
- 2025-05-17 02:23:52,382 - __main__ - INFO - sglang running req: 12 queue req: 39
- 2025-05-17 02:23:53,998 - sglang - INFO - [2025-05-17 02:23:53 TP0] Decode batch. #running-req: 13, #token: 29315, token usage: 0.77, gen throughput (token/s): 307.21, #queue-req: 39
- 2025-05-17 02:23:53,999 - __main__ - INFO - sglang running req: 13 queue req: 39
- 2025-05-17 02:23:54,072 - sglang - INFO - [2025-05-17 02:23:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 38
- 2025-05-17 02:23:54,072 - __main__ - INFO - sglang running req: 12 queue req: 38
- 2025-05-17 02:23:55,704 - sglang - INFO - [2025-05-17 02:23:55 TP0] Decode batch. #running-req: 13, #token: 29710, token usage: 0.78, gen throughput (token/s): 304.35, #queue-req: 38
- 2025-05-17 02:23:55,704 - __main__ - INFO - sglang running req: 13 queue req: 38
- 2025-05-17 02:23:55,777 - sglang - INFO - [2025-05-17 02:23:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1910, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 37
- 2025-05-17 02:23:55,777 - __main__ - INFO - sglang running req: 12 queue req: 37
- 2025-05-17 02:23:57,368 - sglang - INFO - [2025-05-17 02:23:57 TP0] Decode batch. #running-req: 13, #token: 29685, token usage: 0.78, gen throughput (token/s): 311.73, #queue-req: 37
- 2025-05-17 02:23:57,369 - __main__ - INFO - sglang running req: 13 queue req: 37
- 2025-05-17 02:23:57,540 - sglang - INFO - [2025-05-17 02:23:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1906, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 36
- 2025-05-17 02:23:57,540 - __main__ - INFO - sglang running req: 12 queue req: 36
- 2025-05-17 02:23:58,652 - sglang - INFO - [2025-05-17 02:23:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2143, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 35
- 2025-05-17 02:23:58,653 - __main__ - INFO - sglang running req: 12 queue req: 35
- 2025-05-17 02:23:59,441 - sglang - INFO - [2025-05-17 02:23:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2210, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 34
- 2025-05-17 02:23:59,442 - __main__ - INFO - sglang running req: 12 queue req: 34
- 2025-05-17 02:23:59,871 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:23:59,871 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 371.41 371.41
- sglang_output_tokens 62.10 62.10
- 2025-05-17 02:23:59,871 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 20 | 67
- 2025-05-17 02:24:00,539 - sglang - INFO - [2025-05-17 02:24:00 TP0] Decode batch. #running-req: 13, #token: 29458, token usage: 0.78, gen throughput (token/s): 163.08, #queue-req: 34
- 2025-05-17 02:24:00,539 - __main__ - INFO - sglang running req: 13 queue req: 34
- 2025-05-17 02:24:00,906 - sglang - INFO - [2025-05-17 02:24:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2010, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 33
- 2025-05-17 02:24:00,907 - __main__ - INFO - sglang running req: 12 queue req: 33
- 2025-05-17 02:24:02,225 - sglang - INFO - [2025-05-17 02:24:02 TP0] Decode batch. #running-req: 13, #token: 29711, token usage: 0.78, gen throughput (token/s): 307.83, #queue-req: 33
- 2025-05-17 02:24:02,225 - __main__ - INFO - sglang running req: 13 queue req: 33
- 2025-05-17 02:24:02,667 - sglang - INFO - [2025-05-17 02:24:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1968, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 32
- 2025-05-17 02:24:02,667 - __main__ - INFO - sglang running req: 12 queue req: 32
- 2025-05-17 02:24:03,915 - sglang - INFO - [2025-05-17 02:24:03 TP0] Decode batch. #running-req: 13, #token: 27024, token usage: 0.71, gen throughput (token/s): 307.12, #queue-req: 32
- 2025-05-17 02:24:03,915 - __main__ - INFO - sglang running req: 13 queue req: 32
- 2025-05-17 02:24:03,939 - sglang - INFO - [2025-05-17 02:24:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1958, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 31
- 2025-05-17 02:24:03,939 - __main__ - INFO - sglang running req: 12 queue req: 31
- 2025-05-17 02:24:04,935 - sglang - INFO - [2025-05-17 02:24:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 30
- 2025-05-17 02:24:04,936 - __main__ - INFO - sglang running req: 12 queue req: 30
- 2025-05-17 02:24:06,303 - sglang - INFO - [2025-05-17 02:24:06 TP0] Decode batch. #running-req: 13, #token: 28972, token usage: 0.76, gen throughput (token/s): 216.92, #queue-req: 30
- 2025-05-17 02:24:06,303 - __main__ - INFO - sglang running req: 13 queue req: 30
- 2025-05-17 02:24:07,088 - sglang - INFO - [2025-05-17 02:24:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2210, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 29
- 2025-05-17 02:24:07,088 - __main__ - INFO - sglang running req: 12 queue req: 29
- 2025-05-17 02:24:08,064 - sglang - INFO - [2025-05-17 02:24:08 TP0] Decode batch. #running-req: 13, #token: 29012, token usage: 0.76, gen throughput (token/s): 294.63, #queue-req: 29
- 2025-05-17 02:24:08,064 - __main__ - INFO - sglang running req: 13 queue req: 29
- 2025-05-17 02:24:09,054 - sglang - INFO - [2025-05-17 02:24:09 TP0] Decode batch. #running-req: 13, #token: 29532, token usage: 0.78, gen throughput (token/s): 525.44, #queue-req: 29
- 2025-05-17 02:24:09,054 - __main__ - INFO - sglang running req: 13 queue req: 29
- 2025-05-17 02:24:09,872 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:24:09,873 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 431.29 431.29
- sglang_output_tokens 75.72 75.72
- 2025-05-17 02:24:09,873 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 25 | 67
- 2025-05-17 02:24:10,037 - sglang - INFO - [2025-05-17 02:24:10 TP0] Decode batch. #running-req: 13, #token: 30052, token usage: 0.79, gen throughput (token/s): 528.96, #queue-req: 29
- 2025-05-17 02:24:10,037 - __main__ - INFO - sglang running req: 13 queue req: 29
- 2025-05-17 02:24:10,531 - sglang - INFO - [2025-05-17 02:24:10 TP0] Prefill batch. #new-seq: 1, #new-token: 1977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 28
- 2025-05-17 02:24:10,532 - __main__ - INFO - sglang running req: 12 queue req: 28
- 2025-05-17 02:24:11,732 - sglang - INFO - [2025-05-17 02:24:11 TP0] Decode batch. #running-req: 13, #token: 30070, token usage: 0.79, gen throughput (token/s): 306.20, #queue-req: 28
- 2025-05-17 02:24:11,732 - __main__ - INFO - sglang running req: 13 queue req: 28
- 2025-05-17 02:24:11,831 - sglang - INFO - [2025-05-17 02:24:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1920, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 27
- 2025-05-17 02:24:11,831 - __main__ - INFO - sglang running req: 12 queue req: 27
- 2025-05-17 02:24:12,613 - sglang - INFO - [2025-05-17 02:24:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1953, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 26
- 2025-05-17 02:24:12,613 - __main__ - INFO - sglang running req: 12 queue req: 26
- 2025-05-17 02:24:13,528 - sglang - INFO - [2025-05-17 02:24:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2056, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 25
- 2025-05-17 02:24:13,528 - __main__ - INFO - sglang running req: 12 queue req: 25
- 2025-05-17 02:24:14,834 - sglang - INFO - [2025-05-17 02:24:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2109, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 24
- 2025-05-17 02:24:14,834 - __main__ - INFO - sglang running req: 12 queue req: 24
- 2025-05-17 02:24:15,617 - sglang - INFO - [2025-05-17 02:24:15 TP0] Decode batch. #running-req: 13, #token: 29043, token usage: 0.76, gen throughput (token/s): 132.83, #queue-req: 24
- 2025-05-17 02:24:15,617 - __main__ - INFO - sglang running req: 13 queue req: 24
- 2025-05-17 02:24:15,666 - sglang - INFO - [2025-05-17 02:24:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1907, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 23
- 2025-05-17 02:24:15,667 - __main__ - INFO - sglang running req: 12 queue req: 23
- 2025-05-17 02:24:16,815 - sglang - INFO - [2025-05-17 02:24:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2241, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 22
- 2025-05-17 02:24:16,815 - __main__ - INFO - sglang running req: 12 queue req: 22
- 2025-05-17 02:24:18,108 - sglang - INFO - [2025-05-17 02:24:18 TP0] Decode batch. #running-req: 13, #token: 28773, token usage: 0.76, gen throughput (token/s): 207.93, #queue-req: 22
- 2025-05-17 02:24:18,108 - __main__ - INFO - sglang running req: 13 queue req: 22
- 2025-05-17 02:24:18,404 - sglang - INFO - [2025-05-17 02:24:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2175, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 21
- 2025-05-17 02:24:18,405 - __main__ - INFO - sglang running req: 12 queue req: 21
- 2025-05-17 02:24:19,827 - sglang - INFO - [2025-05-17 02:24:19 TP0] Decode batch. #running-req: 13, #token: 29186, token usage: 0.77, gen throughput (token/s): 301.88, #queue-req: 21
- 2025-05-17 02:24:19,827 - __main__ - INFO - sglang running req: 13 queue req: 21
- 2025-05-17 02:24:19,874 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:24:19,874 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 528.39 528.39
- sglang_output_tokens 95.05 95.05
- 2025-05-17 02:24:19,874 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 33 | 67
- 2025-05-17 02:24:20,470 - sglang - INFO - [2025-05-17 02:24:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1944, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 20
- 2025-05-17 02:24:20,471 - __main__ - INFO - sglang running req: 12 queue req: 20
- 2025-05-17 02:24:21,530 - sglang - INFO - [2025-05-17 02:24:21 TP0] Decode batch. #running-req: 13, #token: 29065, token usage: 0.77, gen throughput (token/s): 304.70, #queue-req: 20
- 2025-05-17 02:24:21,531 - __main__ - INFO - sglang running req: 13 queue req: 20
- 2025-05-17 02:24:22,002 - sglang - INFO - [2025-05-17 02:24:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2014, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 19
- 2025-05-17 02:24:22,002 - __main__ - INFO - sglang running req: 12 queue req: 19
- 2025-05-17 02:24:23,065 - sglang - INFO - [2025-05-17 02:24:23 TP0] Prefill batch. #new-seq: 1, #new-token: 1983, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 18
- 2025-05-17 02:24:23,066 - __main__ - INFO - sglang running req: 12 queue req: 18
- 2025-05-17 02:24:23,954 - sglang - INFO - [2025-05-17 02:24:23 TP0] Decode batch. #running-req: 13, #token: 28964, token usage: 0.76, gen throughput (token/s): 213.75, #queue-req: 18
- 2025-05-17 02:24:23,954 - __main__ - INFO - sglang running req: 13 queue req: 18
- 2025-05-17 02:24:24,946 - sglang - INFO - [2025-05-17 02:24:24 TP0] Decode batch. #running-req: 13, #token: 29484, token usage: 0.78, gen throughput (token/s): 524.27, #queue-req: 18
- 2025-05-17 02:24:24,946 - __main__ - INFO - sglang running req: 13 queue req: 18
- 2025-05-17 02:24:25,933 - sglang - INFO - [2025-05-17 02:24:25 TP0] Decode batch. #running-req: 13, #token: 30004, token usage: 0.79, gen throughput (token/s): 526.64, #queue-req: 18
- 2025-05-17 02:24:25,933 - __main__ - INFO - sglang running req: 13 queue req: 18
- 2025-05-17 02:24:26,680 - sglang - INFO - [2025-05-17 02:24:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2098, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 17
- 2025-05-17 02:24:26,681 - __main__ - INFO - sglang running req: 12 queue req: 17
- 2025-05-17 02:24:27,679 - sglang - INFO - [2025-05-17 02:24:27 TP0] Decode batch. #running-req: 13, #token: 29809, token usage: 0.78, gen throughput (token/s): 297.21, #queue-req: 17
- 2025-05-17 02:24:27,680 - __main__ - INFO - sglang running req: 13 queue req: 17
- 2025-05-17 02:24:27,803 - sglang - INFO - [2025-05-17 02:24:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 16
- 2025-05-17 02:24:27,804 - __main__ - INFO - sglang running req: 12 queue req: 16
- 2025-05-17 02:24:28,573 - sglang - INFO - [2025-05-17 02:24:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2213, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 15
- 2025-05-17 02:24:28,574 - __main__ - INFO - sglang running req: 12 queue req: 15
- 2025-05-17 02:24:29,472 - sglang - INFO - [2025-05-17 02:24:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 14
- 2025-05-17 02:24:29,473 - __main__ - INFO - sglang running req: 12 queue req: 14
- 2025-05-17 02:24:29,875 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:24:29,876 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 595.63 595.63
- sglang_output_tokens 109.08 109.08
- 2025-05-17 02:24:29,876 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 40 | 67
- 2025-05-17 02:24:30,432 - sglang - INFO - [2025-05-17 02:24:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1929, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 13
- 2025-05-17 02:24:30,432 - __main__ - INFO - sglang running req: 12 queue req: 13
- 2025-05-17 02:24:31,420 - sglang - INFO - [2025-05-17 02:24:31 TP0] Decode batch. #running-req: 12, #token: 27081, token usage: 0.71, gen throughput (token/s): 137.67, #queue-req: 13
- 2025-05-17 02:24:31,420 - __main__ - INFO - sglang running req: 12 queue req: 13
- 2025-05-17 02:24:31,421 - sglang - INFO - [2025-05-17 02:24:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 12
- 2025-05-17 02:24:31,421 - __main__ - INFO - sglang running req: 12 queue req: 12
- 2025-05-17 02:24:33,054 - sglang - INFO - [2025-05-17 02:24:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 11
- 2025-05-17 02:24:33,054 - __main__ - INFO - sglang running req: 12 queue req: 11
- 2025-05-17 02:24:33,658 - sglang - INFO - [2025-05-17 02:24:33 TP0] Decode batch. #running-req: 13, #token: 28368, token usage: 0.75, gen throughput (token/s): 231.94, #queue-req: 11
- 2025-05-17 02:24:33,658 - __main__ - INFO - sglang running req: 13 queue req: 11
- 2025-05-17 02:24:33,979 - sglang - INFO - [2025-05-17 02:24:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2086, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 10
- 2025-05-17 02:24:33,979 - __main__ - INFO - sglang running req: 12 queue req: 10
- 2025-05-17 02:24:35,208 - sglang - INFO - [2025-05-17 02:24:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2010, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 9
- 2025-05-17 02:24:35,208 - __main__ - INFO - sglang running req: 12 queue req: 9
- 2025-05-17 02:24:36,127 - sglang - INFO - [2025-05-17 02:24:36 TP0] Decode batch. #running-req: 13, #token: 29602, token usage: 0.78, gen throughput (token/s): 209.83, #queue-req: 9
- 2025-05-17 02:24:36,127 - __main__ - INFO - sglang running req: 13 queue req: 9
- 2025-05-17 02:24:36,499 - sglang - INFO - [2025-05-17 02:24:36 TP0] Prefill batch. #new-seq: 1, #new-token: 1885, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 8
- 2025-05-17 02:24:36,499 - __main__ - INFO - sglang running req: 12 queue req: 8
- 2025-05-17 02:24:37,835 - sglang - INFO - [2025-05-17 02:24:37 TP0] Decode batch. #running-req: 12, #token: 26663, token usage: 0.70, gen throughput (token/s): 303.22, #queue-req: 8
- 2025-05-17 02:24:37,835 - __main__ - INFO - sglang running req: 12 queue req: 8
- 2025-05-17 02:24:37,835 - sglang - INFO - [2025-05-17 02:24:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 7
- 2025-05-17 02:24:37,835 - __main__ - INFO - sglang running req: 12 queue req: 7
- 2025-05-17 02:24:39,541 - sglang - INFO - [2025-05-17 02:24:39 TP0] Decode batch. #running-req: 13, #token: 29091, token usage: 0.77, gen throughput (token/s): 304.79, #queue-req: 7
- 2025-05-17 02:24:39,541 - __main__ - INFO - sglang running req: 13 queue req: 7
- 2025-05-17 02:24:39,640 - sglang - INFO - [2025-05-17 02:24:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2123, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 6
- 2025-05-17 02:24:39,641 - __main__ - INFO - sglang running req: 12 queue req: 6
- 2025-05-17 02:24:39,878 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:24:39,879 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 654.89 654.89
- sglang_output_tokens 118.22 118.22
- 2025-05-17 02:24:39,879 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 48 | 67
- 2025-05-17 02:24:41,274 - sglang - INFO - [2025-05-17 02:24:41 TP0] Decode batch. #running-req: 13, #token: 29420, token usage: 0.77, gen throughput (token/s): 299.44, #queue-req: 6
- 2025-05-17 02:24:41,274 - __main__ - INFO - sglang running req: 13 queue req: 6
- 2025-05-17 02:24:42,268 - sglang - INFO - [2025-05-17 02:24:42 TP0] Decode batch. #running-req: 13, #token: 29940, token usage: 0.79, gen throughput (token/s): 523.23, #queue-req: 6
- 2025-05-17 02:24:42,268 - __main__ - INFO - sglang running req: 13 queue req: 6
- 2025-05-17 02:24:43,264 - sglang - INFO - [2025-05-17 02:24:43 TP0] Decode batch. #running-req: 13, #token: 30460, token usage: 0.80, gen throughput (token/s): 522.07, #queue-req: 6
- 2025-05-17 02:24:43,264 - __main__ - INFO - sglang running req: 13 queue req: 6
- 2025-05-17 02:24:43,464 - sglang - INFO - [2025-05-17 02:24:43 TP0] Prefill batch. #new-seq: 1, #new-token: 1915, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 5
- 2025-05-17 02:24:43,464 - __main__ - INFO - sglang running req: 12 queue req: 5
- 2025-05-17 02:24:44,500 - sglang - INFO - [2025-05-17 02:24:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 4
- 2025-05-17 02:24:44,501 - __main__ - INFO - sglang running req: 12 queue req: 4
- 2025-05-17 02:24:45,398 - sglang - INFO - [2025-05-17 02:24:45 TP0] Prefill batch. #new-seq: 1, #new-token: 1938, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 3
- 2025-05-17 02:24:45,398 - __main__ - INFO - sglang running req: 12 queue req: 3
- 2025-05-17 02:24:46,270 - sglang - INFO - [2025-05-17 02:24:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2166, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 2
- 2025-05-17 02:24:46,270 - __main__ - INFO - sglang running req: 12 queue req: 2
- 2025-05-17 02:24:47,163 - sglang - INFO - [2025-05-17 02:24:47 TP0] Decode batch. #running-req: 13, #token: 28597, token usage: 0.75, gen throughput (token/s): 132.35, #queue-req: 2
- 2025-05-17 02:24:47,163 - __main__ - INFO - sglang running req: 13 queue req: 2
- 2025-05-17 02:24:47,262 - sglang - INFO - [2025-05-17 02:24:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2186, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 1
- 2025-05-17 02:24:47,263 - __main__ - INFO - sglang running req: 12 queue req: 1
- 2025-05-17 02:24:48,964 - sglang - INFO - [2025-05-17 02:24:48 TP0] Decode batch. #running-req: 13, #token: 29041, token usage: 0.76, gen throughput (token/s): 288.14, #queue-req: 1
- 2025-05-17 02:24:48,964 - __main__ - INFO - sglang running req: 13 queue req: 1
- 2025-05-17 02:24:49,014 - sglang - INFO - [2025-05-17 02:24:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2101, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 0
- 2025-05-17 02:24:49,014 - __main__ - INFO - sglang running req: 12 queue req: 0
- 2025-05-17 02:24:49,301 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 02:24:49,880 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:24:49,880 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 692.31 692.31
- sglang_output_tokens 126.72 126.72
- 2025-05-17 02:24:49,880 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 54 | 67
- 2025-05-17 02:24:50,720 - sglang - INFO - [2025-05-17 02:24:50 TP0] Decode batch. #running-req: 13, #token: 29512, token usage: 0.78, gen throughput (token/s): 295.57, #queue-req: 0
- 2025-05-17 02:24:50,720 - __main__ - INFO - sglang running req: 13 queue req: 0
- 2025-05-17 02:24:51,690 - sglang - INFO - [2025-05-17 02:24:51 TP0] Decode batch. #running-req: 9, #token: 20501, token usage: 0.54, gen throughput (token/s): 410.23, #queue-req: 0
- 2025-05-17 02:24:51,690 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 02:24:52,652 - sglang - INFO - [2025-05-17 02:24:52 TP0] Decode batch. #running-req: 9, #token: 20861, token usage: 0.55, gen throughput (token/s): 374.15, #queue-req: 0
- 2025-05-17 02:24:52,653 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 02:24:53,615 - sglang - INFO - [2025-05-17 02:24:53 TP0] Decode batch. #running-req: 9, #token: 21221, token usage: 0.56, gen throughput (token/s): 373.93, #queue-req: 0
- 2025-05-17 02:24:53,615 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 02:24:54,519 - sglang - INFO - [2025-05-17 02:24:54 TP0] Decode batch. #running-req: 5, #token: 11588, token usage: 0.31, gen throughput (token/s): 295.32, #queue-req: 0
- 2025-05-17 02:24:54,519 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:24:55,410 - sglang - INFO - [2025-05-17 02:24:55 TP0] Decode batch. #running-req: 5, #token: 11788, token usage: 0.31, gen throughput (token/s): 224.53, #queue-req: 0
- 2025-05-17 02:24:55,410 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:24:56,299 - sglang - INFO - [2025-05-17 02:24:56 TP0] Decode batch. #running-req: 5, #token: 11988, token usage: 0.32, gen throughput (token/s): 225.02, #queue-req: 0
- 2025-05-17 02:24:56,299 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:24:57,181 - sglang - INFO - [2025-05-17 02:24:57 TP0] Decode batch. #running-req: 4, #token: 9907, token usage: 0.26, gen throughput (token/s): 197.18, #queue-req: 0
- 2025-05-17 02:24:57,181 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 02:24:58,058 - sglang - INFO - [2025-05-17 02:24:58 TP0] Decode batch. #running-req: 4, #token: 10067, token usage: 0.27, gen throughput (token/s): 182.43, #queue-req: 0
- 2025-05-17 02:24:58,058 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 02:24:58,933 - sglang - INFO - [2025-05-17 02:24:58 TP0] Decode batch. #running-req: 1, #token: 2719, token usage: 0.07, gen throughput (token/s): 162.32, #queue-req: 0
- 2025-05-17 02:24:58,933 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:24:58,961 - __main__ - INFO - Finished TaskGroup for worker on a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
- 2025-05-17 02:24:58,961 - __main__ - INFO - Got 1 docs for a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
- 2025-05-17 02:24:58,963 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 02:24:58,964 - __main__ - INFO - Work done
- 2025-05-17 02:24:58,964 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 02:38:13,235 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 02:38:13,235 - __main__ - INFO - Loading file at olmocr_workspace/job_1747420686/input.pdf as PDF document
- 2025-05-17 02:38:13,235 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 02:38:13,241 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 02:38:13,477 - __main__ - INFO - Starting pipeline with PID 379816
- 2025-05-17 02:38:13,477 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 02:38:19,275 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 02:38:20,316 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 02:38:21,360 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 02:38:22,423 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 02:38:23,492 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 02:38:24,570 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 02:38:25,634 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 02:38:25,675 - sglang - INFO - [2025-05-17 02:38:25] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1020889166, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:38:25,675 - __main__ - INFO - [2025-05-17 02:38:25] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1020889166, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 02:38:26,714 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 02:38:27,757 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 02:38:28,801 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 02:38:29,844 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 02:38:30,891 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 02:38:31,931 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 02:38:32,958 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 02:38:34,016 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 02:38:34,755 - sglang - INFO - [2025-05-17 02:38:34] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:38:34,755 - __main__ - INFO - [2025-05-17 02:38:34] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 02:38:35,089 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 02:38:36,156 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 02:38:37,224 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 02:38:38,293 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 02:38:39,354 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 02:38:40,405 - sglang - INFO - [2025-05-17 02:38:40 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:38:40,405 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 02:38:40,407 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Init torch distributed begin.
- 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Init torch distributed begin.
- 2025-05-17 02:38:41,480 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 02:38:42,548 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 02:38:43,616 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 02:38:44,683 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 02:38:45,756 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 02:38:46,297 - sglang - INFO - [2025-05-17 02:38:46 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:38:46,298 - __main__ - INFO - [2025-05-17 02:38:46 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 02:38:46,813 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 02:38:47,441 - sglang - INFO - [2025-05-17 02:38:47 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:38:47,441 - __main__ - INFO - [2025-05-17 02:38:47 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 02:38:47,868 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 02:38:47,932 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:38:47,933 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 02:38:48,258 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
- 2025-05-17 02:38:48,258 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
- 2025-05-17 02:38:48,920 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 02:38:49,257 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.39it/s]
- 2025-05-17 02:38:49,257 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.39it/s]
- 2025-05-17 02:38:49,974 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 02:38:50,250 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
- 2025-05-17 02:38:50,250 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
- 2025-05-17 02:38:51,028 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 02:38:51,216 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-05-17 02:38:51,217 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-05-17 02:38:51,217 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.22it/s]
- 2025-05-17 02:38:51,217 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.22it/s]
- 2025-05-17 02:38:51,217 - sglang - INFO -
- 2025-05-17 02:38:51,217 - __main__ - INFO -
- 2025-05-17 02:38:51,374 - sglang - INFO - [2025-05-17 02:38:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:38:51,374 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 02:38:51,381 - sglang - INFO - [2025-05-17 02:38:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:38:51,381 - __main__ - INFO - [2025-05-17 02:38:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 02:38:51,381 - sglang - INFO - [2025-05-17 02:38:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:38:51,381 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 02:38:51,569 - sglang - INFO - [2025-05-17 02:38:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:38:51,569 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 02:38:52,074 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 02:38:53,118 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 02:38:53,375 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.05s/it]
50%|█████ | 2/4 [00:01<00:01, 1.73it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
- 2025-05-17 02:38:53,375 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.05s/it]
50%|█████ | 2/4 [00:01<00:01, 1.73it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
- 2025-05-17 02:38:53,375 - sglang - INFO - [2025-05-17 02:38:53 TP0] Capture cuda graph end. Time elapsed: 1.81 s
- 2025-05-17 02:38:53,375 - __main__ - INFO - [2025-05-17 02:38:53 TP0] Capture cuda graph end. Time elapsed: 1.81 s
- 2025-05-17 02:38:54,182 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 02:38:55,246 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 02:38:55,839 - sglang - INFO - [2025-05-17 02:38:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:38:55,839 - __main__ - INFO - [2025-05-17 02:38:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 02:38:56,302 - __main__ - INFO - sglang server is ready.
- 2025-05-17 02:38:56,302 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 02:38:56,302 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:38:56,303 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 02:38:56,303 - __main__ - INFO - Worker 0 processing work item 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
- 2025-05-17 02:38:56,303 - __main__ - INFO - Created all tasks for 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
- 2025-05-17 02:38:56,308 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747420686/input.pdf in worker 0
- 2025-05-17 02:38:56,913 - sglang - INFO - [2025-05-17 02:38:56 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:38:56,913 - __main__ - INFO - [2025-05-17 02:38:56 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:38:56,913 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:38:57,760 - sglang - INFO - [2025-05-17 02:38:57] The server is fired up and ready to roll!
- 2025-05-17 02:38:57,760 - __main__ - INFO - [2025-05-17 02:38:57] The server is fired up and ready to roll!
- 2025-05-17 02:39:02,529 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-1
- 2025-05-17 02:39:02,566 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-2
- 2025-05-17 02:39:02,578 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-3
- 2025-05-17 02:39:02,594 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-4
- 2025-05-17 02:39:02,612 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-5
- 2025-05-17 02:39:06,379 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:39:06,379 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:39:06,379 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 02:39:16,381 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:39:16,381 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:39:16,381 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 02:39:24,017 - sglang - INFO - [2025-05-17 02:39:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 02:39:24,017 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 02:39:24,809 - sglang - INFO - [2025-05-17 02:39:24 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 02:39:24,809 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:39:26,382 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:39:26,383 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 02:39:26,383 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 02:39:28,234 - sglang - INFO - [2025-05-17 02:39:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.31, #queue-req: 0
- 2025-05-17 02:39:28,234 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:28,499 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 02:39:28,500 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 02:39:28,500 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 02:39:29,089 - sglang - INFO - [2025-05-17 02:39:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.75, #queue-req: 0
- 2025-05-17 02:39:29,090 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:29,945 - sglang - INFO - [2025-05-17 02:39:29 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.90, #queue-req: 0
- 2025-05-17 02:39:29,945 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:30,800 - sglang - INFO - [2025-05-17 02:39:30 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.78, #queue-req: 0
- 2025-05-17 02:39:30,800 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:31,656 - sglang - INFO - [2025-05-17 02:39:31 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.67, #queue-req: 0
- 2025-05-17 02:39:31,656 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:32,513 - sglang - INFO - [2025-05-17 02:39:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.32, #queue-req: 0
- 2025-05-17 02:39:32,513 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:33,374 - sglang - INFO - [2025-05-17 02:39:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.41, #queue-req: 0
- 2025-05-17 02:39:33,374 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:34,235 - sglang - INFO - [2025-05-17 02:39:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.14, #queue-req: 0
- 2025-05-17 02:39:34,236 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 02:39:35,094 - sglang - INFO - [2025-05-17 02:39:35 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.84, #queue-req: 0
- 2025-05-17 02:39:35,094 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 02:39:35,936 - sglang - INFO - [2025-05-17 02:39:35 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.67, #queue-req: 0
- 2025-05-17 02:39:35,936 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 02:39:36,384 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 02:39:36,385 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 93.47 93.47
- sglang_output_tokens 18.16 18.16
- 2025-05-17 02:39:36,385 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 5
- 2025-05-17 02:39:36,767 - sglang - INFO - [2025-05-17 02:39:36 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 79.35, #queue-req: 0
- 2025-05-17 02:39:36,768 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:39:37,592 - sglang - INFO - [2025-05-17 02:39:37 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.48, #queue-req: 0
- 2025-05-17 02:39:37,593 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 02:39:38,257 - __main__ - INFO - Finished TaskGroup for worker on 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
- 2025-05-17 02:39:38,258 - __main__ - INFO - Got 1 docs for 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
- 2025-05-17 02:39:38,259 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 02:39:38,259 - __main__ - INFO - Work done
- 2025-05-17 02:39:38,259 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:05:39,818 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:05:39,818 - __main__ - INFO - Loading file at olmocr_workspace/job_1747490733/input.pdf as PDF document
- 2025-05-17 22:05:39,818 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:05:39,822 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 22:05:40,045 - __main__ - INFO - Starting pipeline with PID 399029
- 2025-05-17 22:05:40,045 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:07:49,638 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:07:50,677 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:07:51,731 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:07:52,785 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:07:53,842 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:07:54,878 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:07:55,819 - sglang - INFO - [2025-05-17 22:07:55] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1026987283, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:07:55,819 - __main__ - INFO - [2025-05-17 22:07:55] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1026987283, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:07:55,995 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:07:57,029 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:07:58,107 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:07:59,160 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:08:00,227 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:08:01,291 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:08:02,364 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:08:02,647 - sglang - INFO - [2025-05-17 22:08:02] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:02,647 - __main__ - INFO - [2025-05-17 22:08:02] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:03,441 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:08:04,528 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:08:05,585 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:08:06,669 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:08:07,710 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:08:08,777 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:08:09,197 - sglang - INFO - [2025-05-17 22:08:09 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:09,198 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Init torch distributed begin.
- 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Init torch distributed begin.
- 2025-05-17 22:08:09,854 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:08:10,935 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:08:11,995 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:08:13,066 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:08:14,131 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:08:14,699 - sglang - INFO - [2025-05-17 22:08:14 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:08:14,699 - __main__ - INFO - [2025-05-17 22:08:14 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:08:15,208 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:08:15,384 - sglang - INFO - [2025-05-17 22:08:15 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:08:15,384 - __main__ - INFO - [2025-05-17 22:08:15 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:08:15,384 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:08:15,384 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:08:15,384 - sglang - INFO - sock = connection.create_connection(
- 2025-05-17 22:08:15,384 - __main__ - INFO - sock = connection.create_connection(
- 2025-05-17 22:08:15,384 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,384 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:08:15,385 - sglang - INFO - raise err
- 2025-05-17 22:08:15,385 - __main__ - INFO - raise err
- 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:08:15,385 - sglang - INFO - sock.connect(sa)
- 2025-05-17 22:08:15,385 - __main__ - INFO - sock.connect(sa)
- 2025-05-17 22:08:15,385 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:08:15,385 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:08:15,385 - sglang - INFO -
- 2025-05-17 22:08:15,385 - __main__ - INFO -
- 2025-05-17 22:08:15,385 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:15,385 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:15,385 - sglang - INFO -
- 2025-05-17 22:08:15,385 - __main__ - INFO -
- 2025-05-17 22:08:15,385 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,385 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:08:15,385 - sglang - INFO - response = self._make_request(
- 2025-05-17 22:08:15,385 - __main__ - INFO - response = self._make_request(
- 2025-05-17 22:08:15,385 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,385 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:08:15,385 - sglang - INFO - raise new_e
- 2025-05-17 22:08:15,385 - __main__ - INFO - raise new_e
- 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:08:15,386 - sglang - INFO - self._validate_conn(conn)
- 2025-05-17 22:08:15,386 - __main__ - INFO - self._validate_conn(conn)
- 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:08:15,386 - sglang - INFO - conn.connect()
- 2025-05-17 22:08:15,386 - __main__ - INFO - conn.connect()
- 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:08:15,386 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:08:15,386 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:08:15,386 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,386 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:08:15,386 - sglang - INFO - raise NewConnectionError(
- 2025-05-17 22:08:15,386 - __main__ - INFO - raise NewConnectionError(
- 2025-05-17 22:08:15,386 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:08:15,386 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:08:15,386 - sglang - INFO -
- 2025-05-17 22:08:15,386 - __main__ - INFO -
- 2025-05-17 22:08:15,386 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:15,386 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:15,386 - sglang - INFO -
- 2025-05-17 22:08:15,386 - __main__ - INFO -
- 2025-05-17 22:08:15,386 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,386 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:08:15,386 - sglang - INFO - resp = conn.urlopen(
- 2025-05-17 22:08:15,386 - __main__ - INFO - resp = conn.urlopen(
- 2025-05-17 22:08:15,386 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:08:15,386 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:08:15,387 - sglang - INFO - retries = retries.increment(
- 2025-05-17 22:08:15,387 - __main__ - INFO - retries = retries.increment(
- 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:08:15,387 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:08:15,387 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:08:15,387 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:08:15,387 - sglang - INFO -
- 2025-05-17 22:08:15,387 - __main__ - INFO -
- 2025-05-17 22:08:15,387 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:08:15,387 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:08:15,387 - sglang - INFO -
- 2025-05-17 22:08:15,387 - __main__ - INFO -
- 2025-05-17 22:08:15,387 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,387 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:08:15,387 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:08:15,387 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:08:15,387 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:08:15,387 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:08:15,388 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:08:15,388 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:08:15,388 - sglang - INFO - self.load_model()
- 2025-05-17 22:08:15,388 - __main__ - INFO - self.load_model()
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:08:15,388 - sglang - INFO - self.model = get_model(
- 2025-05-17 22:08:15,388 - __main__ - INFO - self.model = get_model(
- 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^
- 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:08:15,388 - sglang - INFO - return loader.load_model(
- 2025-05-17 22:08:15,388 - __main__ - INFO - return loader.load_model(
- 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:08:15,388 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:08:15,388 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:08:15,388 - sglang - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:08:15,388 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:08:15,389 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:08:15,389 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:08:15,389 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:08:15,389 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:08:15,389 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:08:15,389 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:08:15,389 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:08:15,389 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:08:15,389 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:08:15,389 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:08:15,389 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:08:15,390 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:08:15,390 - sglang - INFO - self._api.repo_info(
- 2025-05-17 22:08:15,390 - __main__ - INFO - self._api.repo_info(
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:15,390 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:15,390 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:08:15,390 - sglang - INFO - return method(
- 2025-05-17 22:08:15,390 - __main__ - INFO - return method(
- 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^
- 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:15,390 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:15,390 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:08:15,390 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:08:15,390 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:08:15,391 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:08:15,391 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:08:15,391 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:08:15,391 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:08:15,391 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:08:15,391 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:08:15,391 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:08:15,391 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:08:15,392 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,392 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:15,392 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:08:15,392 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:08:15,392 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:08:15,392 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:08:15,392 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: ae2f99d1-1701-487a-8bb1-775aa4b85868)')
- 2025-05-17 22:08:15,392 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: ae2f99d1-1701-487a-8bb1-775aa4b85868)')
- 2025-05-17 22:08:15,392 - sglang - INFO -
- 2025-05-17 22:08:15,392 - __main__ - INFO -
- 2025-05-17 22:08:15,393 - sglang - INFO - [2025-05-17 22:08:15] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:08:15,393 - __main__ - INFO - [2025-05-17 22:08:15] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:08:15,556 - __main__ - WARNING - SGLang server task ended
- 2025-05-17 22:08:16,284 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:08:17,351 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:08:18,418 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:08:19,486 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:08:20,554 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:08:21,617 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 22:08:21,653 - sglang - INFO - [2025-05-17 22:08:21] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=245788542, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:08:21,653 - __main__ - INFO - [2025-05-17 22:08:21] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=245788542, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:08:22,679 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 22:08:23,765 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 22:08:24,823 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 22:08:25,890 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 22:08:27,034 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 22:08:28,099 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-17 22:08:28,528 - sglang - INFO - [2025-05-17 22:08:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:28,528 - __main__ - INFO - [2025-05-17 22:08:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:29,176 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-17 22:08:30,243 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-05-17 22:08:31,310 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-05-17 22:08:32,378 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-05-17 22:08:33,446 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-05-17 22:08:34,428 - sglang - INFO - [2025-05-17 22:08:34 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:34,428 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:34,522 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:34,610 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:34,610 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Init torch distributed begin.
- 2025-05-17 22:08:34,611 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Init torch distributed begin.
- 2025-05-17 22:08:35,599 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-05-17 22:08:36,667 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-05-17 22:08:37,722 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-05-17 22:08:38,789 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-05-17 22:08:39,857 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-05-17 22:08:39,992 - sglang - INFO - [2025-05-17 22:08:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:08:39,992 - __main__ - INFO - [2025-05-17 22:08:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:08:40,668 - sglang - INFO - [2025-05-17 22:08:40 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:08:40,668 - __main__ - INFO - [2025-05-17 22:08:40 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:08:40,669 - sglang - INFO - sock = connection.create_connection(
- 2025-05-17 22:08:40,669 - __main__ - INFO - sock = connection.create_connection(
- 2025-05-17 22:08:40,669 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,669 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:08:40,669 - sglang - INFO - raise err
- 2025-05-17 22:08:40,669 - __main__ - INFO - raise err
- 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:08:40,669 - sglang - INFO - sock.connect(sa)
- 2025-05-17 22:08:40,669 - __main__ - INFO - sock.connect(sa)
- 2025-05-17 22:08:40,670 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:08:40,670 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:08:40,670 - sglang - INFO -
- 2025-05-17 22:08:40,670 - __main__ - INFO -
- 2025-05-17 22:08:40,670 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:40,670 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:40,670 - sglang - INFO -
- 2025-05-17 22:08:40,670 - __main__ - INFO -
- 2025-05-17 22:08:40,670 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,670 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,670 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:08:40,670 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:08:40,670 - sglang - INFO - response = self._make_request(
- 2025-05-17 22:08:40,670 - __main__ - INFO - response = self._make_request(
- 2025-05-17 22:08:40,670 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,670 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:08:40,671 - sglang - INFO - raise new_e
- 2025-05-17 22:08:40,671 - __main__ - INFO - raise new_e
- 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:08:40,671 - sglang - INFO - self._validate_conn(conn)
- 2025-05-17 22:08:40,671 - __main__ - INFO - self._validate_conn(conn)
- 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:08:40,671 - sglang - INFO - conn.connect()
- 2025-05-17 22:08:40,671 - __main__ - INFO - conn.connect()
- 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:08:40,671 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:08:40,671 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:08:40,671 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,672 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,672 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:08:40,672 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:08:40,672 - sglang - INFO - raise NewConnectionError(
- 2025-05-17 22:08:40,672 - __main__ - INFO - raise NewConnectionError(
- 2025-05-17 22:08:40,672 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:08:40,672 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:08:40,672 - sglang - INFO -
- 2025-05-17 22:08:40,672 - __main__ - INFO -
- 2025-05-17 22:08:40,672 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:40,672 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:08:40,672 - sglang - INFO -
- 2025-05-17 22:08:40,672 - __main__ - INFO -
- 2025-05-17 22:08:40,672 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,672 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,672 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:08:40,672 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:08:40,673 - sglang - INFO - resp = conn.urlopen(
- 2025-05-17 22:08:40,673 - __main__ - INFO - resp = conn.urlopen(
- 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:08:40,673 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:08:40,673 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:08:40,673 - sglang - INFO - retries = retries.increment(
- 2025-05-17 22:08:40,673 - __main__ - INFO - retries = retries.increment(
- 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,673 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:08:40,673 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:08:40,673 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:08:40,673 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,674 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:08:40,674 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:08:40,674 - sglang - INFO -
- 2025-05-17 22:08:40,674 - __main__ - INFO -
- 2025-05-17 22:08:40,674 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:08:40,674 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:08:40,674 - sglang - INFO -
- 2025-05-17 22:08:40,674 - __main__ - INFO -
- 2025-05-17 22:08:40,674 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,674 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:08:40,674 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:08:40,674 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:08:40,674 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:08:40,674 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:08:40,674 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,674 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,674 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:08:40,675 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:08:40,675 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:08:40,675 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,675 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:08:40,675 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:08:40,675 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:08:40,675 - sglang - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:08:40,675 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:08:40,675 - sglang - INFO - self.load_model()
- 2025-05-17 22:08:40,675 - __main__ - INFO - self.load_model()
- 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:08:40,676 - sglang - INFO - self.model = get_model(
- 2025-05-17 22:08:40,676 - __main__ - INFO - self.model = get_model(
- 2025-05-17 22:08:40,676 - sglang - INFO - ^^^^^^^^^^
- 2025-05-17 22:08:40,676 - __main__ - INFO - ^^^^^^^^^^
- 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:08:40,676 - sglang - INFO - return loader.load_model(
- 2025-05-17 22:08:40,676 - __main__ - INFO - return loader.load_model(
- 2025-05-17 22:08:40,676 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,676 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:08:40,676 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:08:40,676 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:08:40,677 - sglang - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:08:40,677 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:08:40,677 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:08:40,677 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:08:40,677 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,677 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:08:40,677 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:08:40,677 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:08:40,677 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,677 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:08:40,677 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:08:40,678 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:08:40,678 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:08:40,678 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:08:40,678 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:08:40,678 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:08:40,678 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:08:40,678 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:08:40,678 - sglang - INFO - self._api.repo_info(
- 2025-05-17 22:08:40,678 - __main__ - INFO - self._api.repo_info(
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:40,678 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:40,678 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:08:40,678 - sglang - INFO - return method(
- 2025-05-17 22:08:40,678 - __main__ - INFO - return method(
- 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^
- 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^
- 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:08:40,679 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:40,679 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:08:40,679 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:08:40,679 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:08:40,679 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:08:40,679 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:08:40,679 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:08:40,679 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:08:40,680 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:08:40,680 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:08:40,680 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,680 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:08:40,680 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:08:40,680 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:08:40,680 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,680 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:08:40,680 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:08:40,680 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:08:40,680 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 1d8994fa-4819-4677-98a0-06dfaeccb18c)')
- 2025-05-17 22:08:40,680 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 1d8994fa-4819-4677-98a0-06dfaeccb18c)')
- 2025-05-17 22:08:40,680 - sglang - INFO -
- 2025-05-17 22:08:40,680 - __main__ - INFO -
- 2025-05-17 22:08:40,680 - sglang - INFO - [2025-05-17 22:08:40] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:08:40,680 - __main__ - INFO - [2025-05-17 22:08:40] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:08:40,856 - __main__ - WARNING - SGLang server task ended
- 2025-05-17 22:08:40,981 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-05-17 22:08:42,018 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-05-17 22:08:43,075 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-05-17 22:08:44,141 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-05-17 22:08:45,208 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-05-17 22:08:46,343 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-05-17 22:08:47,414 - sglang - INFO - [2025-05-17 22:08:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=803034972, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:08:47,414 - __main__ - INFO - [2025-05-17 22:08:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=803034972, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:08:47,415 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-05-17 22:08:48,487 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-05-17 22:08:49,520 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-05-17 22:08:50,581 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-05-17 22:08:51,650 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-05-17 22:08:52,707 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-05-17 22:08:53,775 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-05-17 22:08:54,258 - sglang - INFO - [2025-05-17 22:08:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:54,258 - __main__ - INFO - [2025-05-17 22:08:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:08:54,852 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-05-17 22:08:55,919 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-05-17 22:08:56,988 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-05-17 22:08:58,051 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-05-17 22:08:59,104 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-05-17 22:08:59,768 - sglang - INFO - [2025-05-17 22:08:59 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:59,769 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:59,945 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:59,945 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Init torch distributed begin.
- 2025-05-17 22:08:59,946 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Init torch distributed begin.
- 2025-05-17 22:09:00,182 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-05-17 22:09:01,248 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-05-17 22:09:02,312 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-05-17 22:09:03,380 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-05-17 22:09:04,448 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-05-17 22:09:05,299 - sglang - INFO - [2025-05-17 22:09:05 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:09:05,299 - __main__ - INFO - [2025-05-17 22:09:05 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:09:05,523 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-05-17 22:09:05,902 - sglang - INFO - [2025-05-17 22:09:05 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:09:05,902 - __main__ - INFO - [2025-05-17 22:09:05 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-05-17 22:09:05,903 - sglang - INFO - sock = connection.create_connection(
- 2025-05-17 22:09:05,903 - __main__ - INFO - sock = connection.create_connection(
- 2025-05-17 22:09:05,903 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,903 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-05-17 22:09:05,903 - sglang - INFO - raise err
- 2025-05-17 22:09:05,903 - __main__ - INFO - raise err
- 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-05-17 22:09:05,903 - sglang - INFO - sock.connect(sa)
- 2025-05-17 22:09:05,903 - __main__ - INFO - sock.connect(sa)
- 2025-05-17 22:09:05,903 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:09:05,903 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-05-17 22:09:05,903 - sglang - INFO -
- 2025-05-17 22:09:05,903 - __main__ - INFO -
- 2025-05-17 22:09:05,903 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:09:05,903 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:09:05,904 - sglang - INFO -
- 2025-05-17 22:09:05,904 - __main__ - INFO -
- 2025-05-17 22:09:05,904 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,904 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-05-17 22:09:05,904 - sglang - INFO - response = self._make_request(
- 2025-05-17 22:09:05,904 - __main__ - INFO - response = self._make_request(
- 2025-05-17 22:09:05,904 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,904 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-05-17 22:09:05,904 - sglang - INFO - raise new_e
- 2025-05-17 22:09:05,904 - __main__ - INFO - raise new_e
- 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-05-17 22:09:05,904 - sglang - INFO - self._validate_conn(conn)
- 2025-05-17 22:09:05,904 - __main__ - INFO - self._validate_conn(conn)
- 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-05-17 22:09:05,904 - sglang - INFO - conn.connect()
- 2025-05-17 22:09:05,904 - __main__ - INFO - conn.connect()
- 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:09:05,905 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-05-17 22:09:05,905 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:09:05,905 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-05-17 22:09:05,905 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,905 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:09:05,905 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-05-17 22:09:05,905 - sglang - INFO - raise NewConnectionError(
- 2025-05-17 22:09:05,905 - __main__ - INFO - raise NewConnectionError(
- 2025-05-17 22:09:05,905 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:09:05,905 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-05-17 22:09:05,905 - sglang - INFO -
- 2025-05-17 22:09:05,905 - __main__ - INFO -
- 2025-05-17 22:09:05,905 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:09:05,905 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-05-17 22:09:05,905 - sglang - INFO -
- 2025-05-17 22:09:05,905 - __main__ - INFO -
- 2025-05-17 22:09:05,905 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,905 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-05-17 22:09:05,906 - sglang - INFO - resp = conn.urlopen(
- 2025-05-17 22:09:05,906 - __main__ - INFO - resp = conn.urlopen(
- 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-05-17 22:09:05,906 - sglang - INFO - retries = retries.increment(
- 2025-05-17 22:09:05,906 - __main__ - INFO - retries = retries.increment(
- 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-05-17 22:09:05,906 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:09:05,906 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,906 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:09:05,906 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-05-17 22:09:05,906 - sglang - INFO -
- 2025-05-17 22:09:05,906 - __main__ - INFO -
- 2025-05-17 22:09:05,907 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:09:05,907 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-05-17 22:09:05,907 - sglang - INFO -
- 2025-05-17 22:09:05,907 - __main__ - INFO -
- 2025-05-17 22:09:05,907 - sglang - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,907 - __main__ - INFO - Traceback (most recent call last):
- 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-05-17 22:09:05,907 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:09:05,907 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-05-17 22:09:05,907 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,907 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-05-17 22:09:05,907 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:09:05,907 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-05-17 22:09:05,907 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,907 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-05-17 22:09:05,907 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:09:05,907 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-05-17 22:09:05,908 - sglang - INFO - self.load_model()
- 2025-05-17 22:09:05,908 - __main__ - INFO - self.load_model()
- 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-05-17 22:09:05,908 - sglang - INFO - self.model = get_model(
- 2025-05-17 22:09:05,908 - __main__ - INFO - self.model = get_model(
- 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^
- 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^
- 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-05-17 22:09:05,908 - sglang - INFO - return loader.load_model(
- 2025-05-17 22:09:05,908 - __main__ - INFO - return loader.load_model(
- 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-05-17 22:09:05,908 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:09:05,908 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-05-17 22:09:05,909 - sglang - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:09:05,909 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-05-17 22:09:05,909 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:09:05,909 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-05-17 22:09:05,909 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:09:05,909 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-05-17 22:09:05,909 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:09:05,909 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-05-17 22:09:05,910 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:09:05,910 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-05-17 22:09:05,910 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:09:05,910 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-05-17 22:09:05,910 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:09:05,910 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-05-17 22:09:05,910 - sglang - INFO - self._api.repo_info(
- 2025-05-17 22:09:05,910 - __main__ - INFO - self._api.repo_info(
- 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:09:05,911 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:09:05,911 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-05-17 22:09:05,911 - sglang - INFO - return method(
- 2025-05-17 22:09:05,911 - __main__ - INFO - return method(
- 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^
- 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^
- 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-05-17 22:09:05,911 - sglang - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:09:05,911 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-05-17 22:09:05,911 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:09:05,912 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-05-17 22:09:05,912 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:09:05,912 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-05-17 22:09:05,912 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:09:05,912 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-05-17 22:09:05,912 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:09:05,912 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-05-17 22:09:05,913 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:09:05,913 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-05-17 22:09:05,913 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,913 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-05-17 22:09:05,913 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:09:05,913 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-05-17 22:09:05,913 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:09:05,913 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-05-17 22:09:05,913 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 385785cb-3404-4e73-aefc-9a748405b66f)')
- 2025-05-17 22:09:05,913 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 385785cb-3404-4e73-aefc-9a748405b66f)')
- 2025-05-17 22:09:05,913 - sglang - INFO -
- 2025-05-17 22:09:05,913 - __main__ - INFO -
- 2025-05-17 22:09:05,913 - sglang - INFO - [2025-05-17 22:09:05] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:09:05,913 - __main__ - INFO - [2025-05-17 22:09:05] Received sigquit from a child proces. It usually means the child failed.
- 2025-05-17 22:09:06,062 - __main__ - WARNING - SGLang server task ended
- 2025-05-17 22:09:06,600 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-05-17 22:09:07,669 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-05-17 22:09:08,728 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-05-17 22:09:09,792 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
- 2025-05-17 22:09:10,394 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:09:30,402 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:09:30,402 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:09:30,402 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:09:30,405 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:09:30,623 - __main__ - INFO - Starting pipeline with PID 401355
- 2025-05-17 22:09:30,623 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:09:35,724 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:10:16,045 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:10:16,045 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491009/input.pdf as PDF document
- 2025-05-17 22:10:16,045 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:10:16,048 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:10:16,256 - __main__ - INFO - Starting pipeline with PID 401510
- 2025-05-17 22:10:16,256 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:10:21,469 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:10:22,511 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:10:23,556 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:10:24,608 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:10:25,675 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:10:26,816 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:10:27,868 - sglang - INFO - [2025-05-17 22:10:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=907351504, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:10:27,868 - __main__ - INFO - [2025-05-17 22:10:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=907351504, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:10:27,869 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:10:28,903 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:10:29,963 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:10:31,030 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:10:32,098 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:10:33,166 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:10:34,232 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:10:35,266 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:10:36,326 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:10:37,398 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:10:38,466 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:10:39,529 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:10:40,585 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:10:41,651 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:10:42,718 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:10:43,747 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:10:44,782 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:10:45,845 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:10:46,913 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:10:47,981 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:10:49,050 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:10:50,118 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:10:51,187 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:10:52,255 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:10:53,323 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 22:10:54,391 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 22:10:55,459 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 22:10:56,527 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 22:10:57,590 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 22:10:58,647 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 22:10:59,714 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-17 22:11:00,781 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-17 22:11:01,848 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-05-17 22:11:02,919 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-05-17 22:11:03,954 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-05-17 22:11:05,021 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-05-17 22:11:06,088 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-05-17 22:11:07,156 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-05-17 22:11:08,223 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-05-17 22:11:09,291 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-05-17 22:11:10,363 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-05-17 22:11:11,419 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-05-17 22:11:12,487 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-05-17 22:11:13,555 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-05-17 22:11:14,628 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-05-17 22:11:15,684 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-05-17 22:11:16,747 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-05-17 22:11:17,814 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-05-17 22:11:18,880 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-05-17 22:11:19,948 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-05-17 22:11:21,015 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-05-17 22:11:22,091 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-05-17 22:11:23,155 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-05-17 22:11:24,228 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-05-17 22:11:25,295 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-05-17 22:11:26,362 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-05-17 22:11:27,430 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-05-17 22:11:28,498 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-05-17 22:11:29,562 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-05-17 22:11:30,629 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-05-17 22:11:31,693 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-05-17 22:11:32,745 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-05-17 22:11:33,811 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-05-17 22:11:34,880 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-05-17 22:11:35,947 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-05-17 22:11:37,014 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-05-17 22:11:38,082 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-05-17 22:11:39,150 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-05-17 22:11:40,218 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-05-17 22:11:41,287 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
- 2025-05-17 22:11:42,354 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
- 2025-05-17 22:11:43,423 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
- 2025-05-17 22:11:44,495 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
- 2025-05-17 22:11:45,567 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
- 2025-05-17 22:11:46,639 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
- 2025-05-17 22:11:47,707 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
- 2025-05-17 22:11:48,771 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
- 2025-05-17 22:11:49,826 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
- 2025-05-17 22:11:50,893 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
- 2025-05-17 22:11:51,962 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
- 2025-05-17 22:11:53,028 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
- 2025-05-17 22:11:54,096 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
- 2025-05-17 22:11:55,163 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
- 2025-05-17 22:11:56,231 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
- 2025-05-17 22:11:57,304 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
- 2025-05-17 22:11:58,372 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
- 2025-05-17 22:11:59,441 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
- 2025-05-17 22:12:00,509 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
- 2025-05-17 22:12:01,577 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
- 2025-05-17 22:12:02,645 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
- 2025-05-17 22:12:03,713 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
- 2025-05-17 22:12:04,323 - sglang - INFO - [2025-05-17 22:12:04] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:12:04,323 - __main__ - INFO - [2025-05-17 22:12:04] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:12:04,790 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
- 2025-05-17 22:12:05,857 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
- 2025-05-17 22:12:06,909 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
- 2025-05-17 22:12:07,976 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
- 2025-05-17 22:12:09,043 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
- 2025-05-17 22:12:10,111 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
- 2025-05-17 22:12:10,225 - sglang - INFO - [2025-05-17 22:12:10 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:12:10,225 - __main__ - INFO - [2025-05-17 22:12:10 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:12:11,188 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
- 2025-05-17 22:12:12,256 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
- 2025-05-17 22:12:13,325 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
- 2025-05-17 22:12:14,393 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
- 2025-05-17 22:12:15,462 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
- 2025-05-17 22:12:16,531 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
- 2025-05-17 22:12:17,604 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
- 2025-05-17 22:12:18,672 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
- 2025-05-17 22:12:19,740 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
- 2025-05-17 22:12:20,809 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
- 2025-05-17 22:12:21,875 - __main__ - WARNING - Attempt 114: Please wait for sglang server to become ready...
- 2025-05-17 22:12:22,936 - __main__ - WARNING - Attempt 115: Please wait for sglang server to become ready...
- 2025-05-17 22:12:23,989 - __main__ - WARNING - Attempt 116: Please wait for sglang server to become ready...
- 2025-05-17 22:12:25,051 - __main__ - WARNING - Attempt 117: Please wait for sglang server to become ready...
- 2025-05-17 22:12:26,120 - __main__ - WARNING - Attempt 118: Please wait for sglang server to become ready...
- 2025-05-17 22:12:27,188 - __main__ - WARNING - Attempt 119: Please wait for sglang server to become ready...
- 2025-05-17 22:12:28,152 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:13:07,270 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:13:07,271 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491180/input.pdf as PDF document
- 2025-05-17 22:13:07,271 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:13:07,273 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:13:07,567 - __main__ - INFO - Starting pipeline with PID 402318
- 2025-05-17 22:13:07,567 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:15:43,322 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:15:43,322 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491337/input.pdf as PDF document
- 2025-05-17 22:15:43,322 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:15:43,324 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:15:43,625 - __main__ - INFO - Starting pipeline with PID 402524
- 2025-05-17 22:15:43,625 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:17:59,962 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:18:01,003 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:18:01,262 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:21:02,917 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:21:02,918 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:21:02,918 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:21:02,920 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:21:03,172 - __main__ - INFO - Starting pipeline with PID 404165
- 2025-05-17 22:21:03,172 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:23:19,332 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:27:55,694 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:27:55,694 - __main__ - INFO - Loading file at olmocr_workspace/job_1747492069/input.pdf as PDF document
- 2025-05-17 22:27:55,694 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:27:55,696 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:27:55,922 - __main__ - INFO - Starting pipeline with PID 404544
- 2025-05-17 22:27:55,922 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:30:11,099 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:30:12,145 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:30:13,201 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:30:14,262 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:30:15,329 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:30:16,395 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:30:17,281 - sglang - INFO - [2025-05-17 22:30:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=403061725, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:30:17,281 - __main__ - INFO - [2025-05-17 22:30:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=403061725, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:30:17,553 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:30:18,621 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:30:19,665 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:30:20,711 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:30:21,758 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:30:22,798 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:30:23,842 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:30:24,878 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:30:25,941 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:30:27,008 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:30:28,077 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:30:29,145 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:30:30,209 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:30:31,261 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:30:32,328 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:30:33,391 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:30:34,459 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:30:35,526 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:30:36,595 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:30:37,663 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:30:38,736 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:30:39,803 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:30:40,873 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:30:41,941 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:30:43,011 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 22:30:44,084 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 22:30:45,152 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 22:30:46,217 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 22:30:47,277 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 22:30:48,330 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 22:30:49,393 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-17 22:30:50,460 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-17 22:30:51,527 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-05-17 22:30:52,600 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-05-17 22:30:53,668 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-05-17 22:30:54,735 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-05-17 22:30:55,804 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-05-17 22:30:56,872 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-05-17 22:30:57,940 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-05-17 22:30:59,004 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-05-17 22:31:00,072 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-05-17 22:31:01,140 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-05-17 22:31:02,208 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-05-17 22:31:03,275 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-05-17 22:31:04,339 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-05-17 22:31:05,392 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-05-17 22:31:06,455 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-05-17 22:31:07,527 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-05-17 22:31:08,595 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-05-17 22:31:09,667 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-05-17 22:31:10,735 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-05-17 22:31:11,808 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-05-17 22:31:12,879 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-05-17 22:31:13,952 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-05-17 22:31:15,020 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-05-17 22:31:16,088 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-05-17 22:31:17,156 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-05-17 22:31:18,224 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-05-17 22:31:19,292 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-05-17 22:31:20,356 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-05-17 22:31:21,416 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-05-17 22:31:22,469 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-05-17 22:31:23,532 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-05-17 22:31:24,599 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-05-17 22:31:25,671 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-05-17 22:31:26,739 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-05-17 22:31:27,807 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-05-17 22:31:28,876 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-05-17 22:31:29,945 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-05-17 22:31:31,013 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
- 2025-05-17 22:31:31,749 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:31:40,102 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:31:40,102 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:31:40,102 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:31:40,106 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:31:40,324 - __main__ - INFO - Starting pipeline with PID 405667
- 2025-05-17 22:31:40,324 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:34:59,610 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:34:59,610 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:34:59,610 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:34:59,613 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:34:59,876 - __main__ - INFO - Starting pipeline with PID 405958
- 2025-05-17 22:34:59,876 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:37:14,916 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:45:58,117 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:45:58,118 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:45:58,118 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:45:58,121 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:45:58,379 - __main__ - INFO - Starting pipeline with PID 406610
- 2025-05-17 22:45:58,379 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:48:14,372 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:48:42,758 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:48:42,758 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:48:42,758 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:48:42,762 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:48:43,037 - __main__ - INFO - Starting pipeline with PID 407353
- 2025-05-17 22:48:43,037 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:51:38,663 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:51:38,663 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:51:38,663 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:51:38,666 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:51:38,887 - __main__ - INFO - Starting pipeline with PID 407920
- 2025-05-17 22:51:38,887 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:51:44,965 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:52:53,522 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:52:53,522 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493567/input.pdf as PDF document
- 2025-05-17 22:52:53,522 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:52:53,524 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:52:53,743 - __main__ - INFO - Starting pipeline with PID 408294
- 2025-05-17 22:52:53,743 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:52:59,333 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:53:00,367 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:53:01,421 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:53:02,488 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:53:03,554 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:53:04,629 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:53:05,307 - sglang - INFO - [2025-05-17 22:53:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=6928412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:53:05,307 - __main__ - INFO - [2025-05-17 22:53:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=6928412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:53:05,699 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:53:06,767 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:53:07,823 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:53:08,864 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:53:09,904 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:53:10,937 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:53:11,990 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:53:13,053 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:53:14,099 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:53:14,330 - sglang - INFO - [2025-05-17 22:53:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:53:14,330 - __main__ - INFO - [2025-05-17 22:53:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:53:15,167 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:53:16,235 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:53:17,295 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:53:18,366 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:53:19,429 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:53:20,484 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:53:20,626 - sglang - INFO - [2025-05-17 22:53:20 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:53:20,626 - __main__ - INFO - [2025-05-17 22:53:20 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:53:21,154 - sglang - INFO - [2025-05-17 22:53:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:53:21,154 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:53:21,155 - sglang - INFO - [2025-05-17 22:53:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:53:21,155 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:53:21,155 - sglang - INFO - [2025-05-17 22:53:21 TP0] Init torch distributed begin.
- 2025-05-17 22:53:21,155 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Init torch distributed begin.
- 2025-05-17 22:53:21,562 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:53:22,629 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:53:23,706 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:53:24,766 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:53:25,840 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:53:26,579 - sglang - INFO - [2025-05-17 22:53:26 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:53:26,580 - __main__ - INFO - [2025-05-17 22:53:26 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:53:26,916 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:53:27,637 - sglang - INFO - [2025-05-17 22:53:27 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:53:27,637 - __main__ - INFO - [2025-05-17 22:53:27 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:53:27,992 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:53:28,214 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:53:28,214 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:53:28,505 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-17 22:53:28,506 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-17 22:53:29,069 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:53:29,476 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.45it/s]
- 2025-05-17 22:53:29,476 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.45it/s]
- 2025-05-17 22:53:30,140 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:53:30,484 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.20it/s]
- 2025-05-17 22:53:30,484 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.20it/s]
- 2025-05-17 22:53:31,217 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 22:53:31,433 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-05-17 22:53:31,434 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-05-17 22:53:31,434 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.24it/s]
- 2025-05-17 22:53:31,434 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.24it/s]
- 2025-05-17 22:53:31,434 - sglang - INFO -
- 2025-05-17 22:53:31,434 - __main__ - INFO -
- 2025-05-17 22:53:31,581 - sglang - INFO - [2025-05-17 22:53:31 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:53:31,582 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:53:31,589 - sglang - INFO - [2025-05-17 22:53:31 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:53:31,589 - __main__ - INFO - [2025-05-17 22:53:31 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:53:31,589 - sglang - INFO - [2025-05-17 22:53:31 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:53:31,590 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:53:31,754 - sglang - INFO - [2025-05-17 22:53:31 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:53:31,755 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:53:32,294 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 22:53:33,328 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 22:53:33,770 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.04s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.07it/s]
100%|██████████| 4/4 [00:02<00:00, 2.36it/s]
100%|██████████| 4/4 [00:02<00:00, 1.99it/s]
- 2025-05-17 22:53:33,771 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.04s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.07it/s]
100%|██████████| 4/4 [00:02<00:00, 2.36it/s]
100%|██████████| 4/4 [00:02<00:00, 1.99it/s]
- 2025-05-17 22:53:33,771 - sglang - INFO - [2025-05-17 22:53:33 TP0] Capture cuda graph end. Time elapsed: 2.02 s
- 2025-05-17 22:53:33,771 - __main__ - INFO - [2025-05-17 22:53:33 TP0] Capture cuda graph end. Time elapsed: 2.02 s
- 2025-05-17 22:53:34,364 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 22:53:35,424 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 22:53:36,307 - sglang - INFO - [2025-05-17 22:53:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:53:36,308 - __main__ - INFO - [2025-05-17 22:53:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:53:36,506 - __main__ - INFO - sglang server is ready.
- 2025-05-17 22:53:36,506 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 22:53:36,506 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:53:36,506 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 22:53:36,506 - __main__ - INFO - Worker 0 processing work item 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
- 2025-05-17 22:53:36,507 - __main__ - INFO - Created all tasks for 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
- 2025-05-17 22:53:36,509 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747493567/input.pdf in worker 0
- 2025-05-17 22:53:37,399 - sglang - INFO - [2025-05-17 22:53:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:53:37,400 - __main__ - INFO - [2025-05-17 22:53:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:53:37,400 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:53:37,970 - sglang - INFO - [2025-05-17 22:53:37] The server is fired up and ready to roll!
- 2025-05-17 22:53:37,970 - __main__ - INFO - [2025-05-17 22:53:37] The server is fired up and ready to roll!
- 2025-05-17 22:53:42,840 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493567/input.pdf-1
- 2025-05-17 22:53:46,579 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:53:46,579 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:53:46,579 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 22:53:56,581 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:53:56,581 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:53:56,582 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 22:54:03,354 - sglang - INFO - [2025-05-17 22:54:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:54:03,355 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:54:04,789 - sglang - INFO - [2025-05-17 22:54:04 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.40, #queue-req: 0
- 2025-05-17 22:54:04,789 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:05,619 - sglang - INFO - [2025-05-17 22:54:05 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.18, #queue-req: 0
- 2025-05-17 22:54:05,619 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:06,442 - sglang - INFO - [2025-05-17 22:54:06 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.58, #queue-req: 0
- 2025-05-17 22:54:06,442 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:06,583 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:54:06,583 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:54:06,583 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 22:54:07,266 - sglang - INFO - [2025-05-17 22:54:07 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.58, #queue-req: 0
- 2025-05-17 22:54:07,266 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:08,088 - sglang - INFO - [2025-05-17 22:54:08 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.61, #queue-req: 0
- 2025-05-17 22:54:08,089 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:08,688 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 22:54:08,924 - sglang - INFO - [2025-05-17 22:54:08 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-05-17 22:54:08,924 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:54:09,551 - __main__ - INFO - Finished TaskGroup for worker on 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
- 2025-05-17 22:54:09,551 - __main__ - INFO - Got 1 docs for 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
- 2025-05-17 22:54:09,552 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 22:54:09,553 - __main__ - INFO - Work done
- 2025-05-17 22:54:09,553 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:55:54,079 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:55:54,079 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-05-17 22:55:54,079 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:55:54,081 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 22:55:54,300 - __main__ - INFO - Starting pipeline with PID 410354
- 2025-05-17 22:55:54,300 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:55:54,768 - __main__ - INFO - No work to do, exiting
- 2025-05-17 22:55:55,833 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:55:55,833 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493749/input.pdf as PDF document
- 2025-05-17 22:55:55,833 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:55:55,837 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 22:55:56,045 - __main__ - INFO - Starting pipeline with PID 410436
- 2025-05-17 22:55:56,045 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:55:56,664 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:55:57,702 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:55:58,747 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:55:59,810 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:56:00,876 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:56:02,020 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:56:02,743 - sglang - INFO - [2025-05-17 22:56:02] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=902798133, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:56:02,743 - __main__ - INFO - [2025-05-17 22:56:02] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=902798133, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:56:03,096 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:56:04,164 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:56:05,207 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:56:06,252 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:56:07,297 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:56:08,328 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:56:09,378 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:56:10,443 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:56:11,510 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:56:12,059 - sglang - INFO - [2025-05-17 22:56:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:56:12,059 - __main__ - INFO - [2025-05-17 22:56:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:56:12,508 - sglang - INFO - [2025-05-17 22:56:12 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:56:12,508 - __main__ - INFO - [2025-05-17 22:56:12 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:56:12,585 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Init torch distributed begin.
- 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Init torch distributed begin.
- 2025-05-17 22:56:13,663 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:56:14,732 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:56:15,781 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:56:16,844 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:56:17,910 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:56:18,327 - sglang - INFO - [2025-05-17 22:56:18 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:56:18,328 - __main__ - INFO - [2025-05-17 22:56:18 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:56:18,987 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:56:19,825 - sglang - INFO - [2025-05-17 22:56:19 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:56:19,825 - __main__ - INFO - [2025-05-17 22:56:19 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:56:20,063 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:56:20,343 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:56:20,344 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:56:20,620 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.62it/s]
- 2025-05-17 22:56:20,620 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.62it/s]
- 2025-05-17 22:56:21,141 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:56:21,516 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.56it/s]
- 2025-05-17 22:56:21,516 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.56it/s]
- 2025-05-17 22:56:22,218 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:56:22,418 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.32it/s]
- 2025-05-17 22:56:22,418 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.32it/s]
- 2025-05-17 22:56:23,296 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:56:23,310 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 22:56:23,310 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 22:56:23,310 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.35it/s]
- 2025-05-17 22:56:23,310 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.35it/s]
- 2025-05-17 22:56:23,310 - sglang - INFO -
- 2025-05-17 22:56:23,310 - __main__ - INFO -
- 2025-05-17 22:56:23,446 - sglang - INFO - [2025-05-17 22:56:23 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:56:23,446 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:56:23,452 - sglang - INFO - [2025-05-17 22:56:23 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:56:23,452 - __main__ - INFO - [2025-05-17 22:56:23 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:56:23,452 - sglang - INFO - [2025-05-17 22:56:23 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:56:23,452 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:56:23,607 - sglang - INFO - [2025-05-17 22:56:23 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:56:23,607 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:56:24,374 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:56:25,374 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.80it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.36it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 22:56:25,374 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.80it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.36it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 22:56:25,448 - sglang - INFO - [2025-05-17 22:56:25 TP0] Capture cuda graph end. Time elapsed: 1.77 s
- 2025-05-17 22:56:25,448 - __main__ - INFO - [2025-05-17 22:56:25 TP0] Capture cuda graph end. Time elapsed: 1.77 s
- 2025-05-17 22:56:25,450 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:56:26,518 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:56:27,586 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:56:28,248 - sglang - INFO - [2025-05-17 22:56:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:56:28,249 - __main__ - INFO - [2025-05-17 22:56:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:56:28,677 - __main__ - INFO - sglang server is ready.
- 2025-05-17 22:56:28,678 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 22:56:28,678 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:56:28,678 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 22:56:28,678 - __main__ - INFO - Worker 0 processing work item a118967b13fa84e22675b237c5a5c55c4e2ce2bc
- 2025-05-17 22:56:28,678 - __main__ - INFO - Created all tasks for a118967b13fa84e22675b237c5a5c55c4e2ce2bc
- 2025-05-17 22:56:28,684 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747493749/input.pdf in worker 0
- 2025-05-17 22:56:29,342 - sglang - INFO - [2025-05-17 22:56:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:56:29,342 - __main__ - INFO - [2025-05-17 22:56:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:56:29,342 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:56:30,276 - sglang - INFO - [2025-05-17 22:56:30] The server is fired up and ready to roll!
- 2025-05-17 22:56:30,276 - __main__ - INFO - [2025-05-17 22:56:30] The server is fired up and ready to roll!
- 2025-05-17 22:56:35,358 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-1
- 2025-05-17 22:56:35,363 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-2
- 2025-05-17 22:56:35,372 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-3
- 2025-05-17 22:56:35,383 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-4
- 2025-05-17 22:56:35,391 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-5
- 2025-05-17 22:56:38,680 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:56:38,680 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:56:38,680 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 22:56:48,682 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:56:48,683 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:56:48,683 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 22:56:55,994 - sglang - INFO - [2025-05-17 22:56:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:56:55,995 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:56:56,799 - sglang - INFO - [2025-05-17 22:56:56 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 22:56:56,799 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:56:58,685 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:56:58,685 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:56:58,685 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 22:57:00,260 - sglang - INFO - [2025-05-17 22:57:00 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.37, #queue-req: 0
- 2025-05-17 22:57:00,260 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:00,988 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 22:57:00,988 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 22:57:01,123 - sglang - INFO - [2025-05-17 22:57:01 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.83, #queue-req: 0
- 2025-05-17 22:57:01,123 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:01,981 - sglang - INFO - [2025-05-17 22:57:01 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.99, #queue-req: 0
- 2025-05-17 22:57:01,981 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:02,840 - sglang - INFO - [2025-05-17 22:57:02 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.71, #queue-req: 0
- 2025-05-17 22:57:02,841 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:03,705 - sglang - INFO - [2025-05-17 22:57:03 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 231.30, #queue-req: 0
- 2025-05-17 22:57:03,705 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:04,570 - sglang - INFO - [2025-05-17 22:57:04 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 231.17, #queue-req: 0
- 2025-05-17 22:57:04,570 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:05,435 - sglang - INFO - [2025-05-17 22:57:05 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.16, #queue-req: 0
- 2025-05-17 22:57:05,435 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:06,299 - sglang - INFO - [2025-05-17 22:57:06 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.68, #queue-req: 0
- 2025-05-17 22:57:06,299 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 22:57:07,163 - sglang - INFO - [2025-05-17 22:57:07 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 215.18, #queue-req: 0
- 2025-05-17 22:57:07,163 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 22:57:08,012 - sglang - INFO - [2025-05-17 22:57:08 TP0] Decode batch. #running-req: 3, #token: 5146, token usage: 0.14, gen throughput (token/s): 141.30, #queue-req: 0
- 2025-05-17 22:57:08,012 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 22:57:08,687 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:57:08,687 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 106.67 106.67
- sglang_output_tokens 20.49 20.49
- 2025-05-17 22:57:08,687 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 5
- 2025-05-17 22:57:08,844 - sglang - INFO - [2025-05-17 22:57:08 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 61.35, #queue-req: 0
- 2025-05-17 22:57:08,844 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:57:09,672 - sglang - INFO - [2025-05-17 22:57:09 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.30, #queue-req: 0
- 2025-05-17 22:57:09,672 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 22:57:10,463 - __main__ - INFO - Finished TaskGroup for worker on a118967b13fa84e22675b237c5a5c55c4e2ce2bc
- 2025-05-17 22:57:10,463 - __main__ - INFO - Got 1 docs for a118967b13fa84e22675b237c5a5c55c4e2ce2bc
- 2025-05-17 22:57:10,464 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 22:57:10,465 - __main__ - INFO - Work done
- 2025-05-17 22:57:10,465 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 22:58:44,026 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 22:58:44,026 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493917/input.pdf as PDF document
- 2025-05-17 22:58:44,026 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 22:58:44,032 - __main__ - INFO - Calculated items_per_group: 33 based on average pages per PDF: 15.00
- 2025-05-17 22:58:44,333 - __main__ - INFO - Starting pipeline with PID 412467
- 2025-05-17 22:58:44,333 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 22:58:50,937 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 22:58:51,978 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 22:58:53,023 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 22:58:54,089 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 22:58:55,158 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 22:58:56,228 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 22:58:56,597 - sglang - INFO - [2025-05-17 22:58:56] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=807558455, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:58:56,597 - __main__ - INFO - [2025-05-17 22:58:56] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=807558455, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 22:58:57,298 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 22:58:58,370 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 22:58:59,436 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 22:59:00,505 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 22:59:01,552 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 22:59:02,617 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 22:59:03,683 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 22:59:04,745 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 22:59:05,781 - sglang - INFO - [2025-05-17 22:59:05] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:59:05,782 - __main__ - INFO - [2025-05-17 22:59:05] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 22:59:05,783 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 22:59:06,821 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 22:59:07,880 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 22:59:08,918 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 22:59:09,977 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 22:59:11,033 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 22:59:11,318 - sglang - INFO - [2025-05-17 22:59:11 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:59:11,318 - __main__ - INFO - [2025-05-17 22:59:11 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 22:59:12,112 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 22:59:12,129 - sglang - INFO - [2025-05-17 22:59:12 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 22:59:12,130 - sglang - INFO - [2025-05-17 22:59:12 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 22:59:12,130 - sglang - INFO - [2025-05-17 22:59:12 TP0] Init torch distributed begin.
- 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Init torch distributed begin.
- 2025-05-17 22:59:13,187 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 22:59:14,258 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 22:59:15,325 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 22:59:16,392 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 22:59:17,459 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 22:59:17,497 - sglang - INFO - [2025-05-17 22:59:17 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:59:17,497 - __main__ - INFO - [2025-05-17 22:59:17 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 22:59:18,534 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 22:59:19,026 - sglang - INFO - [2025-05-17 22:59:19 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:59:19,026 - __main__ - INFO - [2025-05-17 22:59:19 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 22:59:19,612 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:59:19,612 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 22:59:19,614 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 22:59:19,821 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
- 2025-05-17 22:59:19,821 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
- 2025-05-17 22:59:20,693 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 22:59:20,764 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-05-17 22:59:20,764 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-05-17 22:59:21,773 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
- 2025-05-17 22:59:21,773 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
- 2025-05-17 22:59:21,774 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 22:59:22,606 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 22:59:22,607 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 22:59:22,607 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
- 2025-05-17 22:59:22,607 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
- 2025-05-17 22:59:22,607 - sglang - INFO -
- 2025-05-17 22:59:22,607 - __main__ - INFO -
- 2025-05-17 22:59:22,738 - sglang - INFO - [2025-05-17 22:59:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:59:22,738 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 22:59:22,744 - sglang - INFO - [2025-05-17 22:59:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:59:22,744 - __main__ - INFO - [2025-05-17 22:59:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 22:59:22,744 - sglang - INFO - [2025-05-17 22:59:22 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:59:22,744 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 22:59:22,854 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 22:59:22,898 - sglang - INFO - [2025-05-17 22:59:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:59:22,898 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 22:59:23,933 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 22:59:24,663 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.03it/s]
50%|█████ | 2/4 [00:01<00:01, 1.82it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.38it/s]
100%|██████████| 4/4 [00:01<00:00, 2.77it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 22:59:24,663 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.03it/s]
50%|█████ | 2/4 [00:01<00:01, 1.82it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.38it/s]
100%|██████████| 4/4 [00:01<00:00, 2.77it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 22:59:24,663 - sglang - INFO - [2025-05-17 22:59:24 TP0] Capture cuda graph end. Time elapsed: 1.76 s
- 2025-05-17 22:59:24,663 - __main__ - INFO - [2025-05-17 22:59:24 TP0] Capture cuda graph end. Time elapsed: 1.76 s
- 2025-05-17 22:59:25,014 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 22:59:26,070 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 22:59:27,135 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 22:59:27,961 - sglang - INFO - [2025-05-17 22:59:27 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:59:27,961 - __main__ - INFO - [2025-05-17 22:59:27 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 22:59:28,225 - __main__ - INFO - sglang server is ready.
- 2025-05-17 22:59:28,226 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 22:59:28,226 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:59:28,226 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 22:59:28,226 - __main__ - INFO - Worker 0 processing work item 02907a3ba6226f0399bbf3080296d8a1a280e502
- 2025-05-17 22:59:28,226 - __main__ - INFO - Created all tasks for 02907a3ba6226f0399bbf3080296d8a1a280e502
- 2025-05-17 22:59:28,235 - __main__ - INFO - Got 15 pages to do for olmocr_workspace/job_1747493917/input.pdf in worker 0
- 2025-05-17 22:59:29,054 - sglang - INFO - [2025-05-17 22:59:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:59:29,054 - __main__ - INFO - [2025-05-17 22:59:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:59:29,054 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:59:30,511 - sglang - INFO - [2025-05-17 22:59:30] The server is fired up and ready to roll!
- 2025-05-17 22:59:30,511 - __main__ - INFO - [2025-05-17 22:59:30] The server is fired up and ready to roll!
- 2025-05-17 22:59:35,607 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-1
- 2025-05-17 22:59:35,625 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-3
- 2025-05-17 22:59:35,631 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-2
- 2025-05-17 22:59:35,647 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-4
- 2025-05-17 22:59:35,653 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-5
- 2025-05-17 22:59:35,664 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-6
- 2025-05-17 22:59:35,670 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-7
- 2025-05-17 22:59:35,674 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-8
- 2025-05-17 22:59:35,680 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-9
- 2025-05-17 22:59:35,688 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-12
- 2025-05-17 22:59:35,689 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-10
- 2025-05-17 22:59:35,695 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-13
- 2025-05-17 22:59:35,696 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-14
- 2025-05-17 22:59:35,698 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-11
- 2025-05-17 22:59:35,704 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-15
- 2025-05-17 22:59:38,226 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:59:38,227 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:59:38,227 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 15
- 2025-05-17 22:59:48,279 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:59:48,279 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:59:48,279 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 15
- 2025-05-17 22:59:57,415 - sglang - INFO - [2025-05-17 22:59:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2470, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 22:59:57,415 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 22:59:58,280 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 22:59:58,280 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 22:59:58,280 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 15
- 2025-05-17 22:59:58,497 - sglang - INFO - [2025-05-17 22:59:58 TP0] Prefill batch. #new-seq: 6, #new-token: 13288, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 8
- 2025-05-17 22:59:58,498 - __main__ - INFO - sglang running req: 1 queue req: 8
- 2025-05-17 23:00:03,556 - sglang - INFO - [2025-05-17 23:00:03 TP0] Decode batch. #running-req: 7, #token: 15989, token usage: 0.42, gen throughput (token/s): 6.69, #queue-req: 8
- 2025-05-17 23:00:03,556 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:04,435 - sglang - INFO - [2025-05-17 23:00:04 TP0] Decode batch. #running-req: 7, #token: 16269, token usage: 0.43, gen throughput (token/s): 318.57, #queue-req: 8
- 2025-05-17 23:00:04,435 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:05,312 - sglang - INFO - [2025-05-17 23:00:05 TP0] Decode batch. #running-req: 7, #token: 16549, token usage: 0.44, gen throughput (token/s): 319.02, #queue-req: 8
- 2025-05-17 23:00:05,313 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:06,190 - sglang - INFO - [2025-05-17 23:00:06 TP0] Decode batch. #running-req: 7, #token: 16829, token usage: 0.44, gen throughput (token/s): 319.03, #queue-req: 8
- 2025-05-17 23:00:06,190 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:07,076 - sglang - INFO - [2025-05-17 23:00:07 TP0] Decode batch. #running-req: 7, #token: 17109, token usage: 0.45, gen throughput (token/s): 316.06, #queue-req: 8
- 2025-05-17 23:00:07,076 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:07,956 - sglang - INFO - [2025-05-17 23:00:07 TP0] Decode batch. #running-req: 7, #token: 17389, token usage: 0.46, gen throughput (token/s): 318.18, #queue-req: 8
- 2025-05-17 23:00:07,956 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:08,281 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:00:08,282 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:00:08,282 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 15
- 2025-05-17 23:00:08,836 - sglang - INFO - [2025-05-17 23:00:08 TP0] Decode batch. #running-req: 7, #token: 17669, token usage: 0.47, gen throughput (token/s): 318.06, #queue-req: 8
- 2025-05-17 23:00:08,837 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:09,717 - sglang - INFO - [2025-05-17 23:00:09 TP0] Decode batch. #running-req: 7, #token: 17949, token usage: 0.47, gen throughput (token/s): 317.94, #queue-req: 8
- 2025-05-17 23:00:09,717 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:10,605 - sglang - INFO - [2025-05-17 23:00:10 TP0] Decode batch. #running-req: 7, #token: 18229, token usage: 0.48, gen throughput (token/s): 315.17, #queue-req: 8
- 2025-05-17 23:00:10,606 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:11,490 - sglang - INFO - [2025-05-17 23:00:11 TP0] Decode batch. #running-req: 7, #token: 18509, token usage: 0.49, gen throughput (token/s): 316.33, #queue-req: 8
- 2025-05-17 23:00:11,491 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:12,374 - sglang - INFO - [2025-05-17 23:00:12 TP0] Decode batch. #running-req: 7, #token: 18789, token usage: 0.49, gen throughput (token/s): 317.03, #queue-req: 8
- 2025-05-17 23:00:12,374 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:13,257 - sglang - INFO - [2025-05-17 23:00:13 TP0] Decode batch. #running-req: 7, #token: 19069, token usage: 0.50, gen throughput (token/s): 316.92, #queue-req: 8
- 2025-05-17 23:00:13,257 - __main__ - INFO - sglang running req: 7 queue req: 8
- 2025-05-17 23:00:13,523 - sglang - INFO - [2025-05-17 23:00:13 TP0] Prefill batch. #new-seq: 3, #new-token: 6276, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.44, #running-req: 6, #queue-req: 5
- 2025-05-17 23:00:13,523 - __main__ - INFO - sglang running req: 6 queue req: 5
- 2025-05-17 23:00:16,343 - sglang - INFO - [2025-05-17 23:00:16 TP0] Decode batch. #running-req: 9, #token: 23262, token usage: 0.61, gen throughput (token/s): 108.57, #queue-req: 5
- 2025-05-17 23:00:16,343 - __main__ - INFO - sglang running req: 9 queue req: 5
- 2025-05-17 23:00:17,294 - sglang - INFO - [2025-05-17 23:00:17 TP0] Decode batch. #running-req: 9, #token: 23622, token usage: 0.62, gen throughput (token/s): 378.50, #queue-req: 5
- 2025-05-17 23:00:17,294 - __main__ - INFO - sglang running req: 9 queue req: 5
- 2025-05-17 23:00:17,460 - sglang - INFO - [2025-05-17 23:00:17 TP0] Prefill batch. #new-seq: 2, #new-token: 3818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 3
- 2025-05-17 23:00:17,460 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-05-17 23:00:18,282 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:00:18,283 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 46.72 46.72
- sglang_output_tokens 11.09 11.09
- 2025-05-17 23:00:18,283 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 15
- 2025-05-17 23:00:19,133 - sglang - INFO - [2025-05-17 23:00:19 TP0] Prefill batch. #new-seq: 2, #new-token: 3881, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.58, #running-req: 9, #queue-req: 1
- 2025-05-17 23:00:19,133 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-05-17 23:00:21,030 - sglang - INFO - [2025-05-17 23:00:21 TP0] Decode batch. #running-req: 11, #token: 23317, token usage: 0.61, gen throughput (token/s): 110.28, #queue-req: 1
- 2025-05-17 23:00:21,030 - __main__ - INFO - sglang running req: 11 queue req: 1
- 2025-05-17 23:00:21,054 - sglang - INFO - [2025-05-17 23:00:21 TP0] Prefill batch. #new-seq: 1, #new-token: 1868, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 10, #queue-req: 0
- 2025-05-17 23:00:21,054 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-05-17 23:00:21,215 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:00:21,217 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:00:21,217 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:00:22,713 - sglang - INFO - [2025-05-17 23:00:22 TP0] Decode batch. #running-req: 10, #token: 24380, token usage: 0.64, gen throughput (token/s): 245.41, #queue-req: 0
- 2025-05-17 23:00:22,713 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-05-17 23:00:23,666 - sglang - INFO - [2025-05-17 23:00:23 TP0] Decode batch. #running-req: 9, #token: 23456, token usage: 0.62, gen throughput (token/s): 396.29, #queue-req: 0
- 2025-05-17 23:00:23,667 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 23:00:24,620 - sglang - INFO - [2025-05-17 23:00:24 TP0] Decode batch. #running-req: 9, #token: 23816, token usage: 0.63, gen throughput (token/s): 377.68, #queue-req: 0
- 2025-05-17 23:00:24,620 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 23:00:25,579 - sglang - INFO - [2025-05-17 23:00:25 TP0] Decode batch. #running-req: 9, #token: 24176, token usage: 0.64, gen throughput (token/s): 375.28, #queue-req: 0
- 2025-05-17 23:00:25,579 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-05-17 23:00:26,481 - sglang - INFO - [2025-05-17 23:00:26 TP0] Decode batch. #running-req: 7, #token: 19555, token usage: 0.51, gen throughput (token/s): 328.25, #queue-req: 0
- 2025-05-17 23:00:26,481 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:00:27,373 - sglang - INFO - [2025-05-17 23:00:27 TP0] Decode batch. #running-req: 6, #token: 16590, token usage: 0.44, gen throughput (token/s): 308.05, #queue-req: 0
- 2025-05-17 23:00:27,374 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 23:00:28,248 - sglang - INFO - [2025-05-17 23:00:28 TP0] Decode batch. #running-req: 5, #token: 13573, token usage: 0.36, gen throughput (token/s): 229.72, #queue-req: 0
- 2025-05-17 23:00:28,249 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:28,284 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:00:28,284 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 188.90 188.90
- sglang_output_tokens 48.36 48.36
- 2025-05-17 23:00:28,285 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 15
- 2025-05-17 23:00:29,130 - sglang - INFO - [2025-05-17 23:00:29 TP0] Decode batch. #running-req: 5, #token: 13773, token usage: 0.36, gen throughput (token/s): 226.73, #queue-req: 0
- 2025-05-17 23:00:29,131 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:30,014 - sglang - INFO - [2025-05-17 23:00:30 TP0] Decode batch. #running-req: 5, #token: 13973, token usage: 0.37, gen throughput (token/s): 226.49, #queue-req: 0
- 2025-05-17 23:00:30,014 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:30,894 - sglang - INFO - [2025-05-17 23:00:30 TP0] Decode batch. #running-req: 5, #token: 14173, token usage: 0.37, gen throughput (token/s): 227.18, #queue-req: 0
- 2025-05-17 23:00:30,894 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:31,778 - sglang - INFO - [2025-05-17 23:00:31 TP0] Decode batch. #running-req: 5, #token: 14373, token usage: 0.38, gen throughput (token/s): 226.30, #queue-req: 0
- 2025-05-17 23:00:31,778 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:32,659 - sglang - INFO - [2025-05-17 23:00:32 TP0] Decode batch. #running-req: 5, #token: 14573, token usage: 0.38, gen throughput (token/s): 226.91, #queue-req: 0
- 2025-05-17 23:00:32,659 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:00:33,155 - __main__ - INFO - Got invalid_page rotation for olmocr_workspace/job_1747493917/input.pdf-15 attempt 0, retrying with 90 rotation
- 2025-05-17 23:00:33,155 - __main__ - WARNING - ValueError on attempt 0 for olmocr_workspace/job_1747493917/input.pdf-15: <class 'ValueError'> - invalid_page rotation for olmocr_workspace/job_1747493917/input.pdf-15
- 2025-05-17 23:00:33,430 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-15
- 2025-05-17 23:00:33,547 - sglang - INFO - [2025-05-17 23:00:33 TP0] Decode batch. #running-req: 3, #token: 9596, token usage: 0.25, gen throughput (token/s): 199.32, #queue-req: 0
- 2025-05-17 23:00:33,547 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:00:33,648 - sglang - INFO - [2025-05-17 23:00:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1868, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.25, #running-req: 3, #queue-req: 0
- 2025-05-17 23:00:33,648 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:00:35,157 - sglang - INFO - [2025-05-17 23:00:35 TP0] Decode batch. #running-req: 4, #token: 11620, token usage: 0.31, gen throughput (token/s): 96.88, #queue-req: 0
- 2025-05-17 23:00:35,158 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:00:36,027 - sglang - INFO - [2025-05-17 23:00:36 TP0] Decode batch. #running-req: 4, #token: 11780, token usage: 0.31, gen throughput (token/s): 183.98, #queue-req: 0
- 2025-05-17 23:00:36,027 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:00:36,903 - sglang - INFO - [2025-05-17 23:00:36 TP0] Decode batch. #running-req: 4, #token: 11940, token usage: 0.31, gen throughput (token/s): 182.74, #queue-req: 0
- 2025-05-17 23:00:36,903 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:00:37,775 - sglang - INFO - [2025-05-17 23:00:37 TP0] Decode batch. #running-req: 4, #token: 12100, token usage: 0.32, gen throughput (token/s): 183.27, #queue-req: 0
- 2025-05-17 23:00:37,776 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:00:38,285 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:00:38,285 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 230.12 230.12
- sglang_output_tokens 60.90 60.90
- 2025-05-17 23:00:38,286 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 12 | 15
- 2025-05-17 23:00:38,641 - sglang - INFO - [2025-05-17 23:00:38 TP0] Decode batch. #running-req: 3, #token: 8863, token usage: 0.23, gen throughput (token/s): 141.03, #queue-req: 0
- 2025-05-17 23:00:38,641 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:00:39,505 - sglang - INFO - [2025-05-17 23:00:39 TP0] Decode batch. #running-req: 3, #token: 8983, token usage: 0.24, gen throughput (token/s): 138.86, #queue-req: 0
- 2025-05-17 23:00:39,505 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:00:40,367 - sglang - INFO - [2025-05-17 23:00:40 TP0] Decode batch. #running-req: 2, #token: 5623, token usage: 0.15, gen throughput (token/s): 138.00, #queue-req: 0
- 2025-05-17 23:00:40,367 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-17 23:00:41,219 - sglang - INFO - [2025-05-17 23:00:41 TP0] Decode batch. #running-req: 1, #token: 2184, token usage: 0.06, gen throughput (token/s): 79.84, #queue-req: 0
- 2025-05-17 23:00:41,219 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:42,057 - sglang - INFO - [2025-05-17 23:00:42 TP0] Decode batch. #running-req: 1, #token: 2224, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-05-17 23:00:42,057 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:42,892 - sglang - INFO - [2025-05-17 23:00:42 TP0] Decode batch. #running-req: 1, #token: 2264, token usage: 0.06, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-05-17 23:00:42,892 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:43,727 - sglang - INFO - [2025-05-17 23:00:43 TP0] Decode batch. #running-req: 1, #token: 2304, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-05-17 23:00:43,728 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:44,566 - sglang - INFO - [2025-05-17 23:00:44 TP0] Decode batch. #running-req: 1, #token: 2344, token usage: 0.06, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-05-17 23:00:44,566 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:45,409 - sglang - INFO - [2025-05-17 23:00:45 TP0] Decode batch. #running-req: 1, #token: 2384, token usage: 0.06, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-05-17 23:00:45,409 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:00:46,004 - __main__ - INFO - Finished TaskGroup for worker on 02907a3ba6226f0399bbf3080296d8a1a280e502
- 2025-05-17 23:00:46,004 - __main__ - INFO - Got 1 docs for 02907a3ba6226f0399bbf3080296d8a1a280e502
- 2025-05-17 23:00:46,006 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:00:46,006 - __main__ - INFO - Work done
- 2025-05-17 23:00:46,007 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:06:23,302 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:06:23,302 - __main__ - INFO - Loading file at tests/gnarly_pdfs/badlines.pdf as PDF document
- 2025-05-17 23:06:23,302 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:06:23,309 - __main__ - INFO - Calculated items_per_group: 50 based on average pages per PDF: 10.00
- 2025-05-17 23:06:23,548 - __main__ - INFO - Starting pipeline with PID 416546
- 2025-05-17 23:06:23,549 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:06:29,154 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:06:30,200 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:06:31,251 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:06:32,316 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:06:33,381 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:06:34,451 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:06:35,178 - sglang - INFO - [2025-05-17 23:06:35] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=153903282, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:06:35,178 - __main__ - INFO - [2025-05-17 23:06:35] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=153903282, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:06:35,496 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:06:36,562 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:06:37,628 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:06:38,698 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:06:39,768 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:06:40,834 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:06:41,900 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:06:42,966 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:06:43,533 - sglang - INFO - [2025-05-17 23:06:43] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:06:43,533 - __main__ - INFO - [2025-05-17 23:06:43] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:06:44,045 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:06:45,117 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:06:46,182 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:06:47,236 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:06:48,297 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:06:49,361 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:06:49,771 - sglang - INFO - [2025-05-17 23:06:49 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:06:49,771 - __main__ - INFO - [2025-05-17 23:06:49 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:06:50,440 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:06:50,582 - sglang - INFO - [2025-05-17 23:06:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:06:50,582 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:06:50,583 - sglang - INFO - [2025-05-17 23:06:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:06:50,583 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:06:50,583 - sglang - INFO - [2025-05-17 23:06:50 TP0] Init torch distributed begin.
- 2025-05-17 23:06:50,583 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Init torch distributed begin.
- 2025-05-17 23:06:51,520 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:06:52,595 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:06:53,661 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:06:54,727 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:06:55,780 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:06:55,881 - sglang - INFO - [2025-05-17 23:06:55 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:06:55,881 - __main__ - INFO - [2025-05-17 23:06:55 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:06:56,854 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:06:56,906 - sglang - INFO - [2025-05-17 23:06:56 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:06:56,906 - __main__ - INFO - [2025-05-17 23:06:56 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:06:57,540 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:06:57,540 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:06:57,816 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
- 2025-05-17 23:06:57,816 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
- 2025-05-17 23:06:57,933 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:06:58,742 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:06:58,742 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:06:59,013 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:06:59,678 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.27it/s]
- 2025-05-17 23:06:59,678 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.27it/s]
- 2025-05-17 23:07:00,093 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:07:00,590 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:07:00,590 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:07:00,590 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:07:00,590 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:07:00,590 - sglang - INFO -
- 2025-05-17 23:07:00,590 - __main__ - INFO -
- 2025-05-17 23:07:00,736 - sglang - INFO - [2025-05-17 23:07:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:07:00,737 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:07:00,743 - sglang - INFO - [2025-05-17 23:07:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:07:00,743 - __main__ - INFO - [2025-05-17 23:07:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:07:00,743 - sglang - INFO - [2025-05-17 23:07:00 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:07:00,743 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:07:00,920 - sglang - INFO - [2025-05-17 23:07:00 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:07:00,920 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:07:01,172 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:07:02,252 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:07:02,669 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.84it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.40it/s]
100%|██████████| 4/4 [00:01<00:00, 2.80it/s]
100%|██████████| 4/4 [00:01<00:00, 2.29it/s]
- 2025-05-17 23:07:02,669 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.84it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.40it/s]
100%|██████████| 4/4 [00:01<00:00, 2.80it/s]
100%|██████████| 4/4 [00:01<00:00, 2.29it/s]
- 2025-05-17 23:07:02,670 - sglang - INFO - [2025-05-17 23:07:02 TP0] Capture cuda graph end. Time elapsed: 1.75 s
- 2025-05-17 23:07:02,670 - __main__ - INFO - [2025-05-17 23:07:02 TP0] Capture cuda graph end. Time elapsed: 1.75 s
- 2025-05-17 23:07:03,288 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:07:04,325 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:07:05,386 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:07:05,965 - sglang - INFO - [2025-05-17 23:07:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:07:05,965 - __main__ - INFO - [2025-05-17 23:07:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:07:06,481 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:07:06,481 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:07:06,481 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:06,481 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:07:06,482 - __main__ - INFO - Worker 0 processing work item 9135f55c864185c3e61b48277b842dd16a718eb8
- 2025-05-17 23:07:06,482 - __main__ - INFO - Created all tasks for 9135f55c864185c3e61b48277b842dd16a718eb8
- 2025-05-17 23:07:06,494 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/badlines.pdf in worker 0
- 2025-05-17 23:07:07,040 - sglang - INFO - [2025-05-17 23:07:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:07:07,040 - __main__ - INFO - [2025-05-17 23:07:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:07:07,040 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:07:08,155 - sglang - INFO - [2025-05-17 23:07:08] The server is fired up and ready to roll!
- 2025-05-17 23:07:08,155 - __main__ - INFO - [2025-05-17 23:07:08] The server is fired up and ready to roll!
- 2025-05-17 23:07:13,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-1
- 2025-05-17 23:07:13,278 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-2
- 2025-05-17 23:07:13,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-4
- 2025-05-17 23:07:13,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-3
- 2025-05-17 23:07:13,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-5
- 2025-05-17 23:07:13,381 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-6
- 2025-05-17 23:07:13,392 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-7
- 2025-05-17 23:07:13,413 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-10
- 2025-05-17 23:07:13,414 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-9
- 2025-05-17 23:07:13,423 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-8
- 2025-05-17 23:07:16,482 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:07:16,483 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:16,483 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-05-17 23:07:26,485 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:07:26,486 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:26,486 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-05-17 23:07:35,638 - sglang - INFO - [2025-05-17 23:07:35 TP0] Prefill batch. #new-seq: 1, #new-token: 3115, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:07:35,638 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:07:36,487 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:07:36,487 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:36,488 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-05-17 23:07:36,779 - sglang - INFO - [2025-05-17 23:07:36 TP0] Prefill batch. #new-seq: 3, #new-token: 12922, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.08, #running-req: 1, #queue-req: 6
- 2025-05-17 23:07:36,779 - __main__ - INFO - sglang running req: 1 queue req: 6
- 2025-05-17 23:07:40,138 - sglang - INFO - [2025-05-17 23:07:40 TP0] Prefill batch. #new-seq: 1, #new-token: 3999, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.42, #running-req: 4, #queue-req: 5
- 2025-05-17 23:07:40,138 - __main__ - INFO - sglang running req: 4 queue req: 5
- 2025-05-17 23:07:42,131 - sglang - INFO - [2025-05-17 23:07:42 TP0] Decode batch. #running-req: 5, #token: 20201, token usage: 0.53, gen throughput (token/s): 4.76, #queue-req: 5
- 2025-05-17 23:07:42,131 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:43,021 - sglang - INFO - [2025-05-17 23:07:43 TP0] Decode batch. #running-req: 5, #token: 20401, token usage: 0.54, gen throughput (token/s): 224.78, #queue-req: 5
- 2025-05-17 23:07:43,021 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:43,905 - sglang - INFO - [2025-05-17 23:07:43 TP0] Decode batch. #running-req: 5, #token: 20601, token usage: 0.54, gen throughput (token/s): 226.09, #queue-req: 5
- 2025-05-17 23:07:43,906 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:44,792 - sglang - INFO - [2025-05-17 23:07:44 TP0] Decode batch. #running-req: 5, #token: 20801, token usage: 0.55, gen throughput (token/s): 225.65, #queue-req: 5
- 2025-05-17 23:07:44,792 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:45,678 - sglang - INFO - [2025-05-17 23:07:45 TP0] Decode batch. #running-req: 5, #token: 21001, token usage: 0.55, gen throughput (token/s): 225.76, #queue-req: 5
- 2025-05-17 23:07:45,678 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:46,488 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:07:46,488 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:46,488 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-05-17 23:07:46,564 - sglang - INFO - [2025-05-17 23:07:46 TP0] Decode batch. #running-req: 5, #token: 21201, token usage: 0.56, gen throughput (token/s): 225.46, #queue-req: 5
- 2025-05-17 23:07:46,565 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:47,452 - sglang - INFO - [2025-05-17 23:07:47 TP0] Decode batch. #running-req: 5, #token: 21401, token usage: 0.56, gen throughput (token/s): 225.28, #queue-req: 5
- 2025-05-17 23:07:47,452 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:48,343 - sglang - INFO - [2025-05-17 23:07:48 TP0] Decode batch. #running-req: 5, #token: 21601, token usage: 0.57, gen throughput (token/s): 224.53, #queue-req: 5
- 2025-05-17 23:07:48,343 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:49,240 - sglang - INFO - [2025-05-17 23:07:49 TP0] Decode batch. #running-req: 5, #token: 21801, token usage: 0.57, gen throughput (token/s): 222.99, #queue-req: 5
- 2025-05-17 23:07:49,240 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:50,130 - sglang - INFO - [2025-05-17 23:07:50 TP0] Decode batch. #running-req: 5, #token: 22001, token usage: 0.58, gen throughput (token/s): 224.64, #queue-req: 5
- 2025-05-17 23:07:50,130 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:51,021 - sglang - INFO - [2025-05-17 23:07:51 TP0] Decode batch. #running-req: 5, #token: 22201, token usage: 0.58, gen throughput (token/s): 224.44, #queue-req: 5
- 2025-05-17 23:07:51,022 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:51,912 - sglang - INFO - [2025-05-17 23:07:51 TP0] Decode batch. #running-req: 5, #token: 22401, token usage: 0.59, gen throughput (token/s): 224.45, #queue-req: 5
- 2025-05-17 23:07:51,913 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:52,813 - sglang - INFO - [2025-05-17 23:07:52 TP0] Decode batch. #running-req: 5, #token: 22601, token usage: 0.59, gen throughput (token/s): 222.20, #queue-req: 5
- 2025-05-17 23:07:52,813 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:53,709 - sglang - INFO - [2025-05-17 23:07:53 TP0] Decode batch. #running-req: 5, #token: 22801, token usage: 0.60, gen throughput (token/s): 223.16, #queue-req: 5
- 2025-05-17 23:07:53,709 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:54,605 - sglang - INFO - [2025-05-17 23:07:54 TP0] Decode batch. #running-req: 5, #token: 23001, token usage: 0.61, gen throughput (token/s): 223.07, #queue-req: 5
- 2025-05-17 23:07:54,606 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:55,500 - sglang - INFO - [2025-05-17 23:07:55 TP0] Decode batch. #running-req: 5, #token: 23201, token usage: 0.61, gen throughput (token/s): 223.62, #queue-req: 5
- 2025-05-17 23:07:55,500 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:56,397 - sglang - INFO - [2025-05-17 23:07:56 TP0] Decode batch. #running-req: 5, #token: 23401, token usage: 0.62, gen throughput (token/s): 222.92, #queue-req: 5
- 2025-05-17 23:07:56,397 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:56,490 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:07:56,490 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:07:56,491 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-05-17 23:07:57,300 - sglang - INFO - [2025-05-17 23:07:57 TP0] Decode batch. #running-req: 5, #token: 23601, token usage: 0.62, gen throughput (token/s): 221.44, #queue-req: 5
- 2025-05-17 23:07:57,301 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:58,201 - sglang - INFO - [2025-05-17 23:07:58 TP0] Decode batch. #running-req: 5, #token: 23801, token usage: 0.63, gen throughput (token/s): 221.99, #queue-req: 5
- 2025-05-17 23:07:58,201 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:59,102 - sglang - INFO - [2025-05-17 23:07:59 TP0] Decode batch. #running-req: 5, #token: 24001, token usage: 0.63, gen throughput (token/s): 222.08, #queue-req: 5
- 2025-05-17 23:07:59,102 - __main__ - INFO - sglang running req: 5 queue req: 5
- 2025-05-17 23:07:59,666 - sglang - INFO - [2025-05-17 23:07:59 TP0] Prefill batch. #new-seq: 2, #new-token: 7388, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 4, #queue-req: 3
- 2025-05-17 23:07:59,666 - __main__ - INFO - sglang running req: 4 queue req: 3
- 2025-05-17 23:08:02,168 - sglang - INFO - [2025-05-17 23:08:02 TP0] Decode batch. #running-req: 6, #token: 27671, token usage: 0.73, gen throughput (token/s): 69.79, #queue-req: 3
- 2025-05-17 23:08:02,169 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:03,081 - sglang - INFO - [2025-05-17 23:08:03 TP0] Decode batch. #running-req: 6, #token: 27911, token usage: 0.73, gen throughput (token/s): 262.85, #queue-req: 3
- 2025-05-17 23:08:03,082 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:03,997 - sglang - INFO - [2025-05-17 23:08:03 TP0] Decode batch. #running-req: 6, #token: 28151, token usage: 0.74, gen throughput (token/s): 262.18, #queue-req: 3
- 2025-05-17 23:08:03,997 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:04,917 - sglang - INFO - [2025-05-17 23:08:04 TP0] Decode batch. #running-req: 6, #token: 28391, token usage: 0.75, gen throughput (token/s): 260.73, #queue-req: 3
- 2025-05-17 23:08:04,917 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:05,835 - sglang - INFO - [2025-05-17 23:08:05 TP0] Decode batch. #running-req: 6, #token: 28631, token usage: 0.75, gen throughput (token/s): 261.54, #queue-req: 3
- 2025-05-17 23:08:05,835 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:06,491 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:06,492 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 30.18 30.18
- sglang_output_tokens 7.93 7.93
- 2025-05-17 23:08:06,492 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 10
- 2025-05-17 23:08:06,753 - sglang - INFO - [2025-05-17 23:08:06 TP0] Decode batch. #running-req: 6, #token: 28871, token usage: 0.76, gen throughput (token/s): 261.22, #queue-req: 3
- 2025-05-17 23:08:06,754 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:07,672 - sglang - INFO - [2025-05-17 23:08:07 TP0] Decode batch. #running-req: 6, #token: 29111, token usage: 0.77, gen throughput (token/s): 261.24, #queue-req: 3
- 2025-05-17 23:08:07,672 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:08,600 - sglang - INFO - [2025-05-17 23:08:08 TP0] Decode batch. #running-req: 6, #token: 29351, token usage: 0.77, gen throughput (token/s): 258.69, #queue-req: 3
- 2025-05-17 23:08:08,600 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:09,522 - sglang - INFO - [2025-05-17 23:08:09 TP0] Decode batch. #running-req: 6, #token: 29591, token usage: 0.78, gen throughput (token/s): 260.24, #queue-req: 3
- 2025-05-17 23:08:09,523 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-05-17 23:08:10,078 - sglang - INFO - [2025-05-17 23:08:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 5, #queue-req: 2
- 2025-05-17 23:08:10,079 - __main__ - INFO - sglang running req: 5 queue req: 2
- 2025-05-17 23:08:11,267 - sglang - INFO - [2025-05-17 23:08:11 TP0] Prefill batch. #new-seq: 2, #new-token: 8214, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.58, #running-req: 5, #queue-req: 0
- 2025-05-17 23:08:11,267 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:11,437 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:08:13,811 - sglang - INFO - [2025-05-17 23:08:13 TP0] Decode batch. #running-req: 7, #token: 30217, token usage: 0.80, gen throughput (token/s): 56.66, #queue-req: 0
- 2025-05-17 23:08:13,811 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:08:14,740 - sglang - INFO - [2025-05-17 23:08:14 TP0] Decode batch. #running-req: 7, #token: 30497, token usage: 0.80, gen throughput (token/s): 301.38, #queue-req: 0
- 2025-05-17 23:08:14,740 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:08:15,674 - sglang - INFO - [2025-05-17 23:08:15 TP0] Decode batch. #running-req: 7, #token: 30777, token usage: 0.81, gen throughput (token/s): 299.88, #queue-req: 0
- 2025-05-17 23:08:15,674 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:08:16,492 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:16,493 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 101.61 101.61
- sglang_output_tokens 28.12 28.12
- 2025-05-17 23:08:16,493 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 10
- 2025-05-17 23:08:16,605 - sglang - INFO - [2025-05-17 23:08:16 TP0] Decode batch. #running-req: 7, #token: 31057, token usage: 0.82, gen throughput (token/s): 300.57, #queue-req: 0
- 2025-05-17 23:08:16,605 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:08:17,535 - sglang - INFO - [2025-05-17 23:08:17 TP0] Decode batch. #running-req: 7, #token: 31337, token usage: 0.82, gen throughput (token/s): 301.25, #queue-req: 0
- 2025-05-17 23:08:17,535 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-05-17 23:08:18,459 - sglang - INFO - [2025-05-17 23:08:18 TP0] Decode batch. #running-req: 6, #token: 26755, token usage: 0.70, gen throughput (token/s): 262.99, #queue-req: 0
- 2025-05-17 23:08:18,459 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 23:08:19,386 - sglang - INFO - [2025-05-17 23:08:19 TP0] Decode batch. #running-req: 6, #token: 26995, token usage: 0.71, gen throughput (token/s): 258.64, #queue-req: 0
- 2025-05-17 23:08:19,387 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-05-17 23:08:20,307 - sglang - INFO - [2025-05-17 23:08:20 TP0] Decode batch. #running-req: 5, #token: 20697, token usage: 0.54, gen throughput (token/s): 250.92, #queue-req: 0
- 2025-05-17 23:08:20,307 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:21,214 - sglang - INFO - [2025-05-17 23:08:21 TP0] Decode batch. #running-req: 5, #token: 20897, token usage: 0.55, gen throughput (token/s): 220.58, #queue-req: 0
- 2025-05-17 23:08:21,214 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:22,119 - sglang - INFO - [2025-05-17 23:08:22 TP0] Decode batch. #running-req: 5, #token: 21097, token usage: 0.56, gen throughput (token/s): 220.91, #queue-req: 0
- 2025-05-17 23:08:22,120 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:23,038 - sglang - INFO - [2025-05-17 23:08:23 TP0] Decode batch. #running-req: 5, #token: 21297, token usage: 0.56, gen throughput (token/s): 217.73, #queue-req: 0
- 2025-05-17 23:08:23,038 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:23,954 - sglang - INFO - [2025-05-17 23:08:23 TP0] Decode batch. #running-req: 5, #token: 21497, token usage: 0.57, gen throughput (token/s): 218.32, #queue-req: 0
- 2025-05-17 23:08:23,954 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:24,869 - sglang - INFO - [2025-05-17 23:08:24 TP0] Decode batch. #running-req: 5, #token: 21697, token usage: 0.57, gen throughput (token/s): 218.43, #queue-req: 0
- 2025-05-17 23:08:24,870 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:25,785 - sglang - INFO - [2025-05-17 23:08:25 TP0] Decode batch. #running-req: 5, #token: 21897, token usage: 0.58, gen throughput (token/s): 218.38, #queue-req: 0
- 2025-05-17 23:08:25,786 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:26,494 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:26,494 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 162.63 162.63
- sglang_output_tokens 48.74 48.74
- 2025-05-17 23:08:26,494 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 10
- 2025-05-17 23:08:26,705 - sglang - INFO - [2025-05-17 23:08:26 TP0] Decode batch. #running-req: 5, #token: 22097, token usage: 0.58, gen throughput (token/s): 217.36, #queue-req: 0
- 2025-05-17 23:08:26,706 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:27,633 - sglang - INFO - [2025-05-17 23:08:27 TP0] Decode batch. #running-req: 5, #token: 22297, token usage: 0.59, gen throughput (token/s): 215.52, #queue-req: 0
- 2025-05-17 23:08:27,634 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:28,553 - sglang - INFO - [2025-05-17 23:08:28 TP0] Decode batch. #running-req: 5, #token: 22497, token usage: 0.59, gen throughput (token/s): 217.45, #queue-req: 0
- 2025-05-17 23:08:28,554 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:29,469 - sglang - INFO - [2025-05-17 23:08:29 TP0] Decode batch. #running-req: 5, #token: 22697, token usage: 0.60, gen throughput (token/s): 218.41, #queue-req: 0
- 2025-05-17 23:08:29,469 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:30,389 - sglang - INFO - [2025-05-17 23:08:30 TP0] Decode batch. #running-req: 5, #token: 22897, token usage: 0.60, gen throughput (token/s): 217.36, #queue-req: 0
- 2025-05-17 23:08:30,389 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:08:31,296 - sglang - INFO - [2025-05-17 23:08:31 TP0] Decode batch. #running-req: 4, #token: 18516, token usage: 0.49, gen throughput (token/s): 189.67, #queue-req: 0
- 2025-05-17 23:08:31,296 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:32,196 - sglang - INFO - [2025-05-17 23:08:32 TP0] Decode batch. #running-req: 4, #token: 18676, token usage: 0.49, gen throughput (token/s): 177.81, #queue-req: 0
- 2025-05-17 23:08:32,196 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:33,094 - sglang - INFO - [2025-05-17 23:08:33 TP0] Decode batch. #running-req: 4, #token: 18836, token usage: 0.50, gen throughput (token/s): 178.20, #queue-req: 0
- 2025-05-17 23:08:33,094 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:33,995 - sglang - INFO - [2025-05-17 23:08:33 TP0] Decode batch. #running-req: 4, #token: 18996, token usage: 0.50, gen throughput (token/s): 177.43, #queue-req: 0
- 2025-05-17 23:08:33,995 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:34,904 - sglang - INFO - [2025-05-17 23:08:34 TP0] Decode batch. #running-req: 4, #token: 19156, token usage: 0.50, gen throughput (token/s): 176.02, #queue-req: 0
- 2025-05-17 23:08:34,905 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:35,808 - sglang - INFO - [2025-05-17 23:08:35 TP0] Decode batch. #running-req: 4, #token: 19316, token usage: 0.51, gen throughput (token/s): 176.97, #queue-req: 0
- 2025-05-17 23:08:35,809 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:08:36,495 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:36,495 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 206.00 206.00
- sglang_output_tokens 60.74 60.74
- 2025-05-17 23:08:36,495 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 10
- 2025-05-17 23:08:36,702 - sglang - INFO - [2025-05-17 23:08:36 TP0] Decode batch. #running-req: 3, #token: 14513, token usage: 0.38, gen throughput (token/s): 147.71, #queue-req: 0
- 2025-05-17 23:08:36,702 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:08:37,581 - sglang - INFO - [2025-05-17 23:08:37 TP0] Decode batch. #running-req: 2, #token: 10658, token usage: 0.28, gen throughput (token/s): 102.41, #queue-req: 0
- 2025-05-17 23:08:37,581 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-17 23:08:38,453 - sglang - INFO - [2025-05-17 23:08:38 TP0] Decode batch. #running-req: 1, #token: 5341, token usage: 0.14, gen throughput (token/s): 79.15, #queue-req: 0
- 2025-05-17 23:08:38,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:39,310 - sglang - INFO - [2025-05-17 23:08:39 TP0] Decode batch. #running-req: 1, #token: 5381, token usage: 0.14, gen throughput (token/s): 46.64, #queue-req: 0
- 2025-05-17 23:08:39,310 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:40,163 - sglang - INFO - [2025-05-17 23:08:40 TP0] Decode batch. #running-req: 1, #token: 5421, token usage: 0.14, gen throughput (token/s): 46.89, #queue-req: 0
- 2025-05-17 23:08:40,164 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:41,016 - sglang - INFO - [2025-05-17 23:08:41 TP0] Decode batch. #running-req: 1, #token: 5461, token usage: 0.14, gen throughput (token/s): 46.90, #queue-req: 0
- 2025-05-17 23:08:41,017 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:41,871 - sglang - INFO - [2025-05-17 23:08:41 TP0] Decode batch. #running-req: 1, #token: 5501, token usage: 0.14, gen throughput (token/s): 46.77, #queue-req: 0
- 2025-05-17 23:08:41,872 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:42,728 - sglang - INFO - [2025-05-17 23:08:42 TP0] Decode batch. #running-req: 1, #token: 5541, token usage: 0.15, gen throughput (token/s): 46.68, #queue-req: 0
- 2025-05-17 23:08:42,728 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:43,584 - sglang - INFO - [2025-05-17 23:08:43 TP0] Decode batch. #running-req: 1, #token: 5581, token usage: 0.15, gen throughput (token/s): 46.74, #queue-req: 0
- 2025-05-17 23:08:43,584 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:44,439 - sglang - INFO - [2025-05-17 23:08:44 TP0] Decode batch. #running-req: 1, #token: 5621, token usage: 0.15, gen throughput (token/s): 46.81, #queue-req: 0
- 2025-05-17 23:08:44,439 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:45,301 - sglang - INFO - [2025-05-17 23:08:45 TP0] Decode batch. #running-req: 1, #token: 5661, token usage: 0.15, gen throughput (token/s): 46.36, #queue-req: 0
- 2025-05-17 23:08:45,302 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:46,161 - sglang - INFO - [2025-05-17 23:08:46 TP0] Decode batch. #running-req: 1, #token: 5701, token usage: 0.15, gen throughput (token/s): 46.56, #queue-req: 0
- 2025-05-17 23:08:46,161 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:46,497 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:46,497 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 239.52 239.52
- sglang_output_tokens 73.76 73.76
- 2025-05-17 23:08:46,497 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 10
- 2025-05-17 23:08:47,017 - sglang - INFO - [2025-05-17 23:08:47 TP0] Decode batch. #running-req: 1, #token: 5741, token usage: 0.15, gen throughput (token/s): 46.69, #queue-req: 0
- 2025-05-17 23:08:47,017 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:47,874 - sglang - INFO - [2025-05-17 23:08:47 TP0] Decode batch. #running-req: 1, #token: 5781, token usage: 0.15, gen throughput (token/s): 46.65, #queue-req: 0
- 2025-05-17 23:08:47,875 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:48,734 - sglang - INFO - [2025-05-17 23:08:48 TP0] Decode batch. #running-req: 1, #token: 5821, token usage: 0.15, gen throughput (token/s): 46.56, #queue-req: 0
- 2025-05-17 23:08:48,734 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:49,598 - sglang - INFO - [2025-05-17 23:08:49 TP0] Decode batch. #running-req: 1, #token: 5861, token usage: 0.15, gen throughput (token/s): 46.29, #queue-req: 0
- 2025-05-17 23:08:49,598 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:50,456 - sglang - INFO - [2025-05-17 23:08:50 TP0] Decode batch. #running-req: 1, #token: 5901, token usage: 0.16, gen throughput (token/s): 46.63, #queue-req: 0
- 2025-05-17 23:08:50,456 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:51,313 - sglang - INFO - [2025-05-17 23:08:51 TP0] Decode batch. #running-req: 1, #token: 5941, token usage: 0.16, gen throughput (token/s): 46.65, #queue-req: 0
- 2025-05-17 23:08:51,313 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:52,176 - sglang - INFO - [2025-05-17 23:08:52 TP0] Decode batch. #running-req: 1, #token: 5981, token usage: 0.16, gen throughput (token/s): 46.38, #queue-req: 0
- 2025-05-17 23:08:52,176 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:53,038 - sglang - INFO - [2025-05-17 23:08:53 TP0] Decode batch. #running-req: 1, #token: 6021, token usage: 0.16, gen throughput (token/s): 46.39, #queue-req: 0
- 2025-05-17 23:08:53,038 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:53,902 - sglang - INFO - [2025-05-17 23:08:53 TP0] Decode batch. #running-req: 1, #token: 6061, token usage: 0.16, gen throughput (token/s): 46.29, #queue-req: 0
- 2025-05-17 23:08:53,902 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:54,763 - sglang - INFO - [2025-05-17 23:08:54 TP0] Decode batch. #running-req: 1, #token: 6101, token usage: 0.16, gen throughput (token/s): 46.45, #queue-req: 0
- 2025-05-17 23:08:54,763 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:55,624 - sglang - INFO - [2025-05-17 23:08:55 TP0] Decode batch. #running-req: 1, #token: 6141, token usage: 0.16, gen throughput (token/s): 46.46, #queue-req: 0
- 2025-05-17 23:08:55,624 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:56,487 - sglang - INFO - [2025-05-17 23:08:56 TP0] Decode batch. #running-req: 1, #token: 6181, token usage: 0.16, gen throughput (token/s): 46.36, #queue-req: 0
- 2025-05-17 23:08:56,487 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:56,498 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:08:56,498 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 223.89 223.89
- sglang_output_tokens 68.95 68.95
- 2025-05-17 23:08:56,498 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 10
- 2025-05-17 23:08:57,350 - sglang - INFO - [2025-05-17 23:08:57 TP0] Decode batch. #running-req: 1, #token: 6221, token usage: 0.16, gen throughput (token/s): 46.34, #queue-req: 0
- 2025-05-17 23:08:57,350 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:58,212 - sglang - INFO - [2025-05-17 23:08:58 TP0] Decode batch. #running-req: 1, #token: 6261, token usage: 0.16, gen throughput (token/s): 46.40, #queue-req: 0
- 2025-05-17 23:08:58,212 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:59,076 - sglang - INFO - [2025-05-17 23:08:59 TP0] Decode batch. #running-req: 1, #token: 6301, token usage: 0.17, gen throughput (token/s): 46.30, #queue-req: 0
- 2025-05-17 23:08:59,076 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:08:59,937 - sglang - INFO - [2025-05-17 23:08:59 TP0] Decode batch. #running-req: 1, #token: 6341, token usage: 0.17, gen throughput (token/s): 46.46, #queue-req: 0
- 2025-05-17 23:08:59,937 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:00,801 - sglang - INFO - [2025-05-17 23:09:00 TP0] Decode batch. #running-req: 1, #token: 6381, token usage: 0.17, gen throughput (token/s): 46.26, #queue-req: 0
- 2025-05-17 23:09:00,802 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:01,665 - sglang - INFO - [2025-05-17 23:09:01 TP0] Decode batch. #running-req: 1, #token: 6421, token usage: 0.17, gen throughput (token/s): 46.32, #queue-req: 0
- 2025-05-17 23:09:01,665 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:02,528 - sglang - INFO - [2025-05-17 23:09:02 TP0] Decode batch. #running-req: 1, #token: 6461, token usage: 0.17, gen throughput (token/s): 46.33, #queue-req: 0
- 2025-05-17 23:09:02,528 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:03,390 - sglang - INFO - [2025-05-17 23:09:03 TP0] Decode batch. #running-req: 1, #token: 6501, token usage: 0.17, gen throughput (token/s): 46.42, #queue-req: 0
- 2025-05-17 23:09:03,390 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:04,254 - sglang - INFO - [2025-05-17 23:09:04 TP0] Decode batch. #running-req: 1, #token: 6541, token usage: 0.17, gen throughput (token/s): 46.32, #queue-req: 0
- 2025-05-17 23:09:04,254 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:09:04,284 - __main__ - INFO - Finished TaskGroup for worker on 9135f55c864185c3e61b48277b842dd16a718eb8
- 2025-05-17 23:09:04,284 - __main__ - INFO - Got 1 docs for 9135f55c864185c3e61b48277b842dd16a718eb8
- 2025-05-17 23:09:04,286 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:09:04,286 - __main__ - INFO - Work done
- 2025-05-17 23:09:04,287 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:17:22,986 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:17:22,986 - __main__ - INFO - Loading file at tests/gnarly_pdfs/delivery.pdf as PDF document
- 2025-05-17 23:17:22,986 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:17:22,990 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:17:23,256 - __main__ - INFO - Starting pipeline with PID 438495
- 2025-05-17 23:17:23,256 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:17:25,309 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:17:26,355 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:17:27,398 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:17:28,431 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:17:29,466 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:17:30,512 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:17:31,180 - sglang - INFO - [2025-05-17 23:17:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1021188320, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:17:31,180 - __main__ - INFO - [2025-05-17 23:17:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1021188320, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:17:31,559 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:17:32,605 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:17:33,651 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:17:34,698 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:17:35,746 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:17:36,787 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:17:37,845 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:17:38,916 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:17:39,980 - sglang - INFO - [2025-05-17 23:17:39] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:17:39,980 - __main__ - INFO - [2025-05-17 23:17:39] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:17:39,982 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:17:41,048 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:17:42,118 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:17:43,188 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:17:44,258 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:17:45,324 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:17:46,388 - sglang - INFO - [2025-05-17 23:17:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:17:46,389 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:17:46,390 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:17:46,921 - sglang - INFO - [2025-05-17 23:17:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:17:46,921 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:17:46,921 - sglang - INFO - [2025-05-17 23:17:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:17:46,922 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:17:46,922 - sglang - INFO - [2025-05-17 23:17:46 TP0] Init torch distributed begin.
- 2025-05-17 23:17:46,922 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Init torch distributed begin.
- 2025-05-17 23:17:47,469 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:17:48,527 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:17:49,593 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:17:50,662 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:17:51,732 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:17:52,271 - sglang - INFO - [2025-05-17 23:17:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:17:52,271 - __main__ - INFO - [2025-05-17 23:17:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:17:52,811 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:17:53,877 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:17:53,922 - sglang - INFO - [2025-05-17 23:17:53 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:17:53,922 - __main__ - INFO - [2025-05-17 23:17:53 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:17:54,483 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:17:54,483 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:17:54,759 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
- 2025-05-17 23:17:54,759 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
- 2025-05-17 23:17:54,958 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:17:55,683 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:17:55,683 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:17:56,038 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:17:56,614 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:17:56,614 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:17:57,117 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:17:57,520 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:17:57,520 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:17:57,520 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-17 23:17:57,520 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-17 23:17:57,520 - sglang - INFO -
- 2025-05-17 23:17:57,520 - __main__ - INFO -
- 2025-05-17 23:17:57,667 - sglang - INFO - [2025-05-17 23:17:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:17:57,667 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:17:57,702 - sglang - INFO - [2025-05-17 23:17:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:17:57,702 - __main__ - INFO - [2025-05-17 23:17:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:17:57,702 - sglang - INFO - [2025-05-17 23:17:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:17:57,702 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:17:57,869 - sglang - INFO - [2025-05-17 23:17:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:17:57,869 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:17:58,196 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:17:59,276 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:17:59,675 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.76it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.32it/s]
100%|██████████| 4/4 [00:01<00:00, 2.74it/s]
100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
- 2025-05-17 23:17:59,675 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.76it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.32it/s]
100%|██████████| 4/4 [00:01<00:00, 2.74it/s]
100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
- 2025-05-17 23:17:59,675 - sglang - INFO - [2025-05-17 23:17:59 TP0] Capture cuda graph end. Time elapsed: 1.81 s
- 2025-05-17 23:17:59,675 - __main__ - INFO - [2025-05-17 23:17:59 TP0] Capture cuda graph end. Time elapsed: 1.81 s
- 2025-05-17 23:18:00,356 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:18:01,426 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:18:02,493 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 23:18:02,593 - sglang - INFO - [2025-05-17 23:18:02 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:18:02,593 - __main__ - INFO - [2025-05-17 23:18:02 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:18:03,575 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:18:03,575 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:18:03,575 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:18:03,576 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:18:03,576 - __main__ - INFO - Worker 0 processing work item 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
- 2025-05-17 23:18:03,576 - __main__ - INFO - Created all tasks for 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
- 2025-05-17 23:18:03,582 - __main__ - INFO - Got 5 pages to do for tests/gnarly_pdfs/delivery.pdf in worker 0
- 2025-05-17 23:18:03,674 - sglang - INFO - [2025-05-17 23:18:03 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:18:03,674 - __main__ - INFO - [2025-05-17 23:18:03 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:18:03,674 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:18:04,263 - sglang - INFO - [2025-05-17 23:18:04] The server is fired up and ready to roll!
- 2025-05-17 23:18:04,263 - __main__ - INFO - [2025-05-17 23:18:04] The server is fired up and ready to roll!
- 2025-05-17 23:18:09,894 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-1
- 2025-05-17 23:18:09,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-2
- 2025-05-17 23:18:09,973 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-3
- 2025-05-17 23:18:10,002 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-4
- 2025-05-17 23:18:10,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-5
- 2025-05-17 23:18:13,579 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:18:13,579 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:18:13,579 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:18:23,581 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:18:23,581 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:18:23,581 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:18:31,593 - sglang - INFO - [2025-05-17 23:18:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:18:31,593 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:18:32,439 - sglang - INFO - [2025-05-17 23:18:32 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:18:32,440 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:18:33,583 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:18:33,584 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:18:33,584 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:18:34,690 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:18:35,880 - sglang - INFO - [2025-05-17 23:18:35 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.17, #queue-req: 0
- 2025-05-17 23:18:35,881 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:36,743 - sglang - INFO - [2025-05-17 23:18:36 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.77, #queue-req: 0
- 2025-05-17 23:18:36,743 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:37,600 - sglang - INFO - [2025-05-17 23:18:37 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.38, #queue-req: 0
- 2025-05-17 23:18:37,600 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:38,457 - sglang - INFO - [2025-05-17 23:18:38 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.51, #queue-req: 0
- 2025-05-17 23:18:38,457 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:39,317 - sglang - INFO - [2025-05-17 23:18:39 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.53, #queue-req: 0
- 2025-05-17 23:18:39,317 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:40,184 - sglang - INFO - [2025-05-17 23:18:40 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 230.54, #queue-req: 0
- 2025-05-17 23:18:40,185 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:41,047 - sglang - INFO - [2025-05-17 23:18:41 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.74, #queue-req: 0
- 2025-05-17 23:18:41,048 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:41,911 - sglang - INFO - [2025-05-17 23:18:41 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.58, #queue-req: 0
- 2025-05-17 23:18:41,911 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:18:42,772 - sglang - INFO - [2025-05-17 23:18:42 TP0] Decode batch. #running-req: 4, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.45, #queue-req: 0
- 2025-05-17 23:18:42,772 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:18:43,585 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:18:43,586 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 49.92 49.92
- sglang_output_tokens 8.63 8.63
- 2025-05-17 23:18:43,586 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 5
- 2025-05-17 23:18:43,622 - sglang - INFO - [2025-05-17 23:18:43 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 141.16, #queue-req: 0
- 2025-05-17 23:18:43,622 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:18:44,460 - sglang - INFO - [2025-05-17 23:18:44 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 82.31, #queue-req: 0
- 2025-05-17 23:18:44,460 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:18:45,287 - sglang - INFO - [2025-05-17 23:18:45 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.39, #queue-req: 0
- 2025-05-17 23:18:45,287 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:18:45,952 - __main__ - INFO - Finished TaskGroup for worker on 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
- 2025-05-17 23:18:45,952 - __main__ - INFO - Got 1 docs for 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
- 2025-05-17 23:18:45,954 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:18:45,954 - __main__ - INFO - Work done
- 2025-05-17 23:18:45,954 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:21:20,288 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:21:20,288 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495273/input.pdf as PDF document
- 2025-05-17 23:21:20,288 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:21:20,292 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:21:20,495 - __main__ - INFO - Starting pipeline with PID 447964
- 2025-05-17 23:21:20,496 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:21:26,128 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:21:27,177 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:21:28,223 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:21:29,266 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:21:30,311 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:21:31,351 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:21:31,508 - sglang - INFO - [2025-05-17 23:21:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=842359968, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:21:31,508 - __main__ - INFO - [2025-05-17 23:21:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=842359968, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:21:32,420 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:21:33,486 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:21:34,552 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:21:35,619 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:21:36,689 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:21:37,760 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:21:38,829 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:21:39,898 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:21:40,727 - sglang - INFO - [2025-05-17 23:21:40 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:21:40,727 - __main__ - INFO - [2025-05-17 23:21:40 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:21:40,975 - sglang - INFO - [2025-05-17 23:21:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:21:40,976 - __main__ - INFO - [2025-05-17 23:21:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:21:40,977 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:21:41,212 - sglang - INFO - [2025-05-17 23:21:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:21:41,213 - sglang - INFO - [2025-05-17 23:21:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:21:41,213 - sglang - INFO - [2025-05-17 23:21:41 TP0] Init torch distributed begin.
- 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Init torch distributed begin.
- 2025-05-17 23:21:42,056 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:21:43,121 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:21:44,184 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:21:45,238 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:21:46,302 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:21:46,538 - sglang - INFO - [2025-05-17 23:21:46 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:21:46,538 - __main__ - INFO - [2025-05-17 23:21:46 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:21:47,381 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:21:47,564 - sglang - INFO - [2025-05-17 23:21:47 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:21:47,565 - __main__ - INFO - [2025-05-17 23:21:47 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:21:48,460 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:21:48,525 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:21:48,525 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:21:48,812 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
- 2025-05-17 23:21:48,813 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
- 2025-05-17 23:21:49,539 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:21:49,732 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:21:49,732 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:21:50,620 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:21:50,652 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:21:50,652 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:21:51,550 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-05-17 23:21:51,550 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-05-17 23:21:51,550 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-17 23:21:51,550 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-17 23:21:51,550 - sglang - INFO -
- 2025-05-17 23:21:51,550 - __main__ - INFO -
- 2025-05-17 23:21:51,696 - sglang - INFO - [2025-05-17 23:21:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:21:51,696 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:21:51,696 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:21:51,704 - sglang - INFO - [2025-05-17 23:21:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:21:51,704 - __main__ - INFO - [2025-05-17 23:21:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:21:51,704 - sglang - INFO - [2025-05-17 23:21:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:21:51,704 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:21:51,879 - sglang - INFO - [2025-05-17 23:21:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:21:51,879 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:21:52,776 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:21:53,582 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.85it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s]
100%|██████████| 4/4 [00:01<00:00, 2.92it/s]
100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
- 2025-05-17 23:21:53,582 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.85it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s]
100%|██████████| 4/4 [00:01<00:00, 2.92it/s]
100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
- 2025-05-17 23:21:53,583 - sglang - INFO - [2025-05-17 23:21:53 TP0] Capture cuda graph end. Time elapsed: 1.70 s
- 2025-05-17 23:21:53,583 - __main__ - INFO - [2025-05-17 23:21:53 TP0] Capture cuda graph end. Time elapsed: 1.70 s
- 2025-05-17 23:21:53,856 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:21:54,926 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:21:55,998 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:21:56,147 - sglang - INFO - [2025-05-17 23:21:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:21:56,147 - __main__ - INFO - [2025-05-17 23:21:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:21:57,090 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:21:57,091 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:21:57,091 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:21:57,091 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:21:57,091 - __main__ - INFO - Worker 0 processing work item 88731f2783fb8112f0205c218828d88dc213896f
- 2025-05-17 23:21:57,091 - __main__ - INFO - Created all tasks for 88731f2783fb8112f0205c218828d88dc213896f
- 2025-05-17 23:21:57,098 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747495273/input.pdf in worker 0
- 2025-05-17 23:21:57,223 - sglang - INFO - [2025-05-17 23:21:57 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:21:57,223 - __main__ - INFO - [2025-05-17 23:21:57 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:21:57,223 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:21:57,775 - sglang - INFO - [2025-05-17 23:21:57] The server is fired up and ready to roll!
- 2025-05-17 23:21:57,775 - __main__ - INFO - [2025-05-17 23:21:57] The server is fired up and ready to roll!
- 2025-05-17 23:22:03,589 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-1
- 2025-05-17 23:22:03,604 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-2
- 2025-05-17 23:22:03,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-3
- 2025-05-17 23:22:03,623 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-4
- 2025-05-17 23:22:03,644 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-5
- 2025-05-17 23:22:07,179 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:22:07,179 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:22:07,179 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:22:17,180 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:22:17,181 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:22:17,181 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:22:24,459 - sglang - INFO - [2025-05-17 23:22:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2017, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:22:24,459 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:22:25,256 - sglang - INFO - [2025-05-17 23:22:25 TP0] Prefill batch. #new-seq: 4, #new-token: 8308, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:22:25,257 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:22:27,182 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:22:27,183 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:22:27,183 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:22:28,396 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:22:28,693 - sglang - INFO - [2025-05-17 23:22:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.28, #queue-req: 0
- 2025-05-17 23:22:28,694 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:29,550 - sglang - INFO - [2025-05-17 23:22:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.36, #queue-req: 0
- 2025-05-17 23:22:29,551 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:30,406 - sglang - INFO - [2025-05-17 23:22:30 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.74, #queue-req: 0
- 2025-05-17 23:22:30,406 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:31,261 - sglang - INFO - [2025-05-17 23:22:31 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.82, #queue-req: 0
- 2025-05-17 23:22:31,261 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:32,117 - sglang - INFO - [2025-05-17 23:22:32 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.68, #queue-req: 0
- 2025-05-17 23:22:32,118 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:32,976 - sglang - INFO - [2025-05-17 23:22:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.04, #queue-req: 0
- 2025-05-17 23:22:32,976 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:33,836 - sglang - INFO - [2025-05-17 23:22:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.38, #queue-req: 0
- 2025-05-17 23:22:33,836 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:34,698 - sglang - INFO - [2025-05-17 23:22:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.19, #queue-req: 0
- 2025-05-17 23:22:34,698 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:22:35,556 - sglang - INFO - [2025-05-17 23:22:35 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.71, #queue-req: 0
- 2025-05-17 23:22:35,556 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:22:36,397 - sglang - INFO - [2025-05-17 23:22:36 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.61, #queue-req: 0
- 2025-05-17 23:22:36,398 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:22:37,184 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:22:37,184 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 101.07 101.07
- sglang_output_tokens 19.66 19.66
- 2025-05-17 23:22:37,185 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 5
- 2025-05-17 23:22:37,230 - sglang - INFO - [2025-05-17 23:22:37 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.06, #queue-req: 0
- 2025-05-17 23:22:37,230 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:22:38,054 - sglang - INFO - [2025-05-17 23:22:38 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.52, #queue-req: 0
- 2025-05-17 23:22:38,055 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:22:38,843 - __main__ - INFO - Finished TaskGroup for worker on 88731f2783fb8112f0205c218828d88dc213896f
- 2025-05-17 23:22:38,843 - __main__ - INFO - Got 1 docs for 88731f2783fb8112f0205c218828d88dc213896f
- 2025-05-17 23:22:38,844 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:22:38,845 - __main__ - INFO - Work done
- 2025-05-17 23:22:38,845 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:27:55,455 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:27:55,455 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495669/input.pdf as PDF document
- 2025-05-17 23:27:55,455 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:27:55,457 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 23:27:55,725 - __main__ - INFO - Starting pipeline with PID 450922
- 2025-05-17 23:27:55,725 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:28:01,352 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:28:02,398 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:28:03,447 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:28:04,514 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:28:05,549 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:28:06,599 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:28:07,200 - sglang - INFO - [2025-05-17 23:28:07] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=817117709, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:28:07,201 - __main__ - INFO - [2025-05-17 23:28:07] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=817117709, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:28:07,673 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:28:08,739 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:28:09,806 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:28:10,874 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:28:11,941 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:28:13,012 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:28:14,075 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:28:15,145 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:28:16,212 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:28:16,415 - sglang - INFO - [2025-05-17 23:28:16 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:28:16,416 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:28:16,419 - sglang - INFO - [2025-05-17 23:28:16] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:28:16,420 - __main__ - INFO - [2025-05-17 23:28:16] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:28:16,894 - sglang - INFO - [2025-05-17 23:28:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:28:16,894 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:28:16,894 - sglang - INFO - [2025-05-17 23:28:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:28:16,894 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:28:16,895 - sglang - INFO - [2025-05-17 23:28:16 TP0] Init torch distributed begin.
- 2025-05-17 23:28:16,895 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Init torch distributed begin.
- 2025-05-17 23:28:17,287 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:28:18,354 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:28:19,418 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:28:20,473 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:28:21,540 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:28:22,219 - sglang - INFO - [2025-05-17 23:28:22 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:28:22,219 - __main__ - INFO - [2025-05-17 23:28:22 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:28:22,618 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:28:23,283 - sglang - INFO - [2025-05-17 23:28:23 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:28:23,283 - __main__ - INFO - [2025-05-17 23:28:23 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:28:23,694 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:28:23,782 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:28:23,782 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:28:24,076 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.40it/s]
- 2025-05-17 23:28:24,076 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.40it/s]
- 2025-05-17 23:28:24,763 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:28:25,059 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.43it/s]
- 2025-05-17 23:28:25,060 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.43it/s]
- 2025-05-17 23:28:25,840 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:28:26,064 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.19it/s]
- 2025-05-17 23:28:26,065 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.19it/s]
- 2025-05-17 23:28:26,937 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:28:27,042 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-05-17 23:28:27,042 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-05-17 23:28:27,042 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
- 2025-05-17 23:28:27,042 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
- 2025-05-17 23:28:27,042 - sglang - INFO -
- 2025-05-17 23:28:27,042 - __main__ - INFO -
- 2025-05-17 23:28:27,192 - sglang - INFO - [2025-05-17 23:28:27 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:28:27,193 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:28:27,199 - sglang - INFO - [2025-05-17 23:28:27 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:28:27,200 - __main__ - INFO - [2025-05-17 23:28:27 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:28:27,200 - sglang - INFO - [2025-05-17 23:28:27 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:28:27,200 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:28:27,367 - sglang - INFO - [2025-05-17 23:28:27 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:28:27,368 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:28:28,015 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:28:29,091 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:28:29,145 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.77it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s]
100%|██████████| 4/4 [00:01<00:00, 2.78it/s]
100%|██████████| 4/4 [00:01<00:00, 2.25it/s]
- 2025-05-17 23:28:29,145 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.77it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s]
100%|██████████| 4/4 [00:01<00:00, 2.78it/s]
100%|██████████| 4/4 [00:01<00:00, 2.25it/s]
- 2025-05-17 23:28:29,145 - sglang - INFO - [2025-05-17 23:28:29 TP0] Capture cuda graph end. Time elapsed: 1.78 s
- 2025-05-17 23:28:29,145 - __main__ - INFO - [2025-05-17 23:28:29 TP0] Capture cuda graph end. Time elapsed: 1.78 s
- 2025-05-17 23:28:30,167 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:28:31,239 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:28:31,599 - sglang - INFO - [2025-05-17 23:28:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:28:31,599 - __main__ - INFO - [2025-05-17 23:28:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:28:32,331 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:28:32,331 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:28:32,331 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:28:32,331 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:28:32,332 - __main__ - INFO - Worker 0 processing work item 53fa84f8de7c7f6853d89db3cf39d246b300e93f
- 2025-05-17 23:28:32,332 - __main__ - INFO - Created all tasks for 53fa84f8de7c7f6853d89db3cf39d246b300e93f
- 2025-05-17 23:28:32,334 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747495669/input.pdf in worker 0
- 2025-05-17 23:28:32,676 - sglang - INFO - [2025-05-17 23:28:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:28:32,677 - __main__ - INFO - [2025-05-17 23:28:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:28:32,677 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:28:33,303 - sglang - INFO - [2025-05-17 23:28:33] The server is fired up and ready to roll!
- 2025-05-17 23:28:33,303 - __main__ - INFO - [2025-05-17 23:28:33] The server is fired up and ready to roll!
- 2025-05-17 23:28:38,667 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495669/input.pdf-1
- 2025-05-17 23:28:42,379 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:28:42,379 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:28:42,379 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:28:52,381 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:28:52,381 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:28:52,381 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:28:59,979 - sglang - INFO - [2025-05-17 23:28:59 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:28:59,979 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:29:01,385 - sglang - INFO - [2025-05-17 23:29:01 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.34, #queue-req: 0
- 2025-05-17 23:29:01,385 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:02,203 - sglang - INFO - [2025-05-17 23:29:02 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
- 2025-05-17 23:29:02,204 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:02,382 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:29:02,382 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:29:02,383 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:29:03,021 - sglang - INFO - [2025-05-17 23:29:03 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.90, #queue-req: 0
- 2025-05-17 23:29:03,022 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:03,588 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:29:03,590 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:29:03,590 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:29:03,839 - sglang - INFO - [2025-05-17 23:29:03 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.93, #queue-req: 0
- 2025-05-17 23:29:03,839 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:04,658 - sglang - INFO - [2025-05-17 23:29:04 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.85, #queue-req: 0
- 2025-05-17 23:29:04,658 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:05,477 - sglang - INFO - [2025-05-17 23:29:05 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.80, #queue-req: 0
- 2025-05-17 23:29:05,478 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:06,297 - sglang - INFO - [2025-05-17 23:29:06 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.77, #queue-req: 0
- 2025-05-17 23:29:06,298 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:29:06,346 - __main__ - INFO - Finished TaskGroup for worker on 53fa84f8de7c7f6853d89db3cf39d246b300e93f
- 2025-05-17 23:29:06,346 - __main__ - INFO - Got 1 docs for 53fa84f8de7c7f6853d89db3cf39d246b300e93f
- 2025-05-17 23:29:06,347 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:29:06,348 - __main__ - INFO - Work done
- 2025-05-17 23:29:06,348 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:29:16,250 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:29:16,250 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495750/input.pdf as PDF document
- 2025-05-17 23:29:16,250 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:29:16,254 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:29:16,522 - __main__ - INFO - Starting pipeline with PID 452130
- 2025-05-17 23:29:16,522 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:29:22,121 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:29:23,160 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:29:24,204 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:29:25,266 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:29:26,334 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:29:27,399 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:29:28,006 - sglang - INFO - [2025-05-17 23:29:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=166973500, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:29:28,006 - __main__ - INFO - [2025-05-17 23:29:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=166973500, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:29:28,475 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:29:29,553 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:29:30,621 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:29:31,689 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:29:32,756 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:29:33,823 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:29:34,891 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:29:35,958 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:29:37,027 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:29:37,409 - sglang - INFO - [2025-05-17 23:29:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:29:37,409 - __main__ - INFO - [2025-05-17 23:29:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:29:38,103 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:29:39,167 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:29:40,222 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:29:41,285 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:29:42,351 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:29:42,819 - sglang - INFO - [2025-05-17 23:29:42 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:29:42,819 - __main__ - INFO - [2025-05-17 23:29:42 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:29:43,317 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:29:43,317 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Init torch distributed begin.
- 2025-05-17 23:29:43,318 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Init torch distributed begin.
- 2025-05-17 23:29:43,434 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:29:44,497 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:29:45,565 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:29:46,633 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:29:47,701 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:29:48,686 - sglang - INFO - [2025-05-17 23:29:48 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:29:48,686 - __main__ - INFO - [2025-05-17 23:29:48 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:29:48,771 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:29:49,830 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:29:50,698 - sglang - INFO - [2025-05-17 23:29:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:29:50,698 - __main__ - INFO - [2025-05-17 23:29:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:29:50,907 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:29:51,440 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:29:51,440 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:29:51,737 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.38it/s]
- 2025-05-17 23:29:51,737 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.38it/s]
- 2025-05-17 23:29:51,984 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:29:52,694 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
- 2025-05-17 23:29:52,695 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
- 2025-05-17 23:29:53,061 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:29:53,654 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 23:29:53,654 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 23:29:54,138 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:29:54,597 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
- 2025-05-17 23:29:54,598 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
- 2025-05-17 23:29:54,598 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-05-17 23:29:54,598 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-05-17 23:29:54,598 - sglang - INFO -
- 2025-05-17 23:29:54,598 - __main__ - INFO -
- 2025-05-17 23:29:54,729 - sglang - INFO - [2025-05-17 23:29:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:29:54,729 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:29:54,735 - sglang - INFO - [2025-05-17 23:29:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:29:54,735 - __main__ - INFO - [2025-05-17 23:29:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:29:54,736 - sglang - INFO - [2025-05-17 23:29:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:29:54,736 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:29:54,889 - sglang - INFO - [2025-05-17 23:29:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:29:54,889 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:29:55,215 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:29:56,291 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:29:56,581 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.91it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 23:29:56,581 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.91it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 23:29:56,581 - sglang - INFO - [2025-05-17 23:29:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 23:29:56,581 - __main__ - INFO - [2025-05-17 23:29:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 23:29:57,366 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:29:58,433 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:29:59,499 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 23:29:59,589 - sglang - INFO - [2025-05-17 23:29:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:29:59,589 - __main__ - INFO - [2025-05-17 23:29:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:30:00,591 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:30:00,592 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:30:00,592 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:30:00,592 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:30:00,592 - __main__ - INFO - Worker 0 processing work item d9af858998245f0877efab6a7aad7fa5652f8d23
- 2025-05-17 23:30:00,592 - __main__ - INFO - Created all tasks for d9af858998245f0877efab6a7aad7fa5652f8d23
- 2025-05-17 23:30:00,599 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747495750/input.pdf in worker 0
- 2025-05-17 23:30:00,680 - sglang - INFO - [2025-05-17 23:30:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:30:00,680 - __main__ - INFO - [2025-05-17 23:30:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:30:00,681 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:30:01,264 - sglang - INFO - [2025-05-17 23:30:01] The server is fired up and ready to roll!
- 2025-05-17 23:30:01,264 - __main__ - INFO - [2025-05-17 23:30:01] The server is fired up and ready to roll!
- 2025-05-17 23:30:06,929 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-1
- 2025-05-17 23:30:06,967 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-2
- 2025-05-17 23:30:07,009 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-3
- 2025-05-17 23:30:07,051 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-4
- 2025-05-17 23:30:07,085 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-5
- 2025-05-17 23:30:10,679 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:30:10,679 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:30:10,679 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:30:20,680 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:30:20,681 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:30:20,681 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:30:27,429 - sglang - INFO - [2025-05-17 23:30:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:30:27,430 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:30:28,255 - sglang - INFO - [2025-05-17 23:30:28 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:30:28,256 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:30:30,682 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:30:30,682 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:30:30,682 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:30:31,388 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:30:31,705 - sglang - INFO - [2025-05-17 23:30:31 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.36, #queue-req: 0
- 2025-05-17 23:30:31,705 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:32,564 - sglang - INFO - [2025-05-17 23:30:32 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.73, #queue-req: 0
- 2025-05-17 23:30:32,565 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:33,423 - sglang - INFO - [2025-05-17 23:30:33 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.90, #queue-req: 0
- 2025-05-17 23:30:33,423 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:34,282 - sglang - INFO - [2025-05-17 23:30:34 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.94, #queue-req: 0
- 2025-05-17 23:30:34,282 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:35,140 - sglang - INFO - [2025-05-17 23:30:35 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.96, #queue-req: 0
- 2025-05-17 23:30:35,140 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:36,001 - sglang - INFO - [2025-05-17 23:30:36 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.38, #queue-req: 0
- 2025-05-17 23:30:36,001 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:36,863 - sglang - INFO - [2025-05-17 23:30:36 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.91, #queue-req: 0
- 2025-05-17 23:30:36,863 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:37,725 - sglang - INFO - [2025-05-17 23:30:37 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.03, #queue-req: 0
- 2025-05-17 23:30:37,725 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:30:38,589 - sglang - INFO - [2025-05-17 23:30:38 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.60, #queue-req: 0
- 2025-05-17 23:30:38,589 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:30:39,434 - sglang - INFO - [2025-05-17 23:30:39 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.61, #queue-req: 0
- 2025-05-17 23:30:39,434 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:30:40,271 - sglang - INFO - [2025-05-17 23:30:40 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 83.61, #queue-req: 0
- 2025-05-17 23:30:40,271 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:30:40,684 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:30:40,685 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 92.05 92.05
- sglang_output_tokens 17.96 17.96
- 2025-05-17 23:30:40,685 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 5
- 2025-05-17 23:30:41,099 - sglang - INFO - [2025-05-17 23:30:41 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-05-17 23:30:41,099 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:30:41,766 - __main__ - INFO - Finished TaskGroup for worker on d9af858998245f0877efab6a7aad7fa5652f8d23
- 2025-05-17 23:30:41,766 - __main__ - INFO - Got 1 docs for d9af858998245f0877efab6a7aad7fa5652f8d23
- 2025-05-17 23:30:41,768 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:30:41,768 - __main__ - INFO - Work done
- 2025-05-17 23:30:41,768 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:37:24,245 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:37:24,245 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496237/input.pdf as PDF document
- 2025-05-17 23:37:24,245 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:37:24,250 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:37:24,505 - __main__ - INFO - Starting pipeline with PID 455265
- 2025-05-17 23:37:24,505 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:37:30,487 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:37:31,529 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:37:32,586 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:37:33,631 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:37:34,677 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:37:35,709 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:37:36,253 - sglang - INFO - [2025-05-17 23:37:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=359813597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:37:36,253 - __main__ - INFO - [2025-05-17 23:37:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=359813597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:37:36,771 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:37:37,837 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:37:38,908 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:37:39,979 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:37:41,048 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:37:42,114 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:37:43,181 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:37:44,250 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:37:44,933 - sglang - INFO - [2025-05-17 23:37:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:37:44,933 - __main__ - INFO - [2025-05-17 23:37:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:37:45,329 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:37:46,400 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:37:47,465 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:37:48,532 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:37:49,587 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:37:50,469 - sglang - INFO - [2025-05-17 23:37:50 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:37:50,470 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:37:50,665 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:37:50,967 - sglang - INFO - [2025-05-17 23:37:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:37:50,967 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:37:50,968 - sglang - INFO - [2025-05-17 23:37:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:37:50,968 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:37:50,968 - sglang - INFO - [2025-05-17 23:37:50 TP0] Init torch distributed begin.
- 2025-05-17 23:37:50,968 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Init torch distributed begin.
- 2025-05-17 23:37:51,744 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:37:52,814 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:37:53,884 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:37:54,954 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:37:56,021 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:37:56,348 - sglang - INFO - [2025-05-17 23:37:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:37:56,348 - __main__ - INFO - [2025-05-17 23:37:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:37:57,101 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:37:57,429 - sglang - INFO - [2025-05-17 23:37:57 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:37:57,430 - __main__ - INFO - [2025-05-17 23:37:57 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:37:58,182 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:37:58,390 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:37:58,390 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:37:58,681 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.44it/s]
- 2025-05-17 23:37:58,681 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.44it/s]
- 2025-05-17 23:37:59,261 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:37:59,632 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.47it/s]
- 2025-05-17 23:37:59,632 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.47it/s]
- 2025-05-17 23:38:00,342 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:38:00,586 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
- 2025-05-17 23:38:00,586 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
- 2025-05-17 23:38:01,422 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:38:01,515 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
- 2025-05-17 23:38:01,515 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
- 2025-05-17 23:38:01,515 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
- 2025-05-17 23:38:01,515 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
- 2025-05-17 23:38:01,515 - sglang - INFO -
- 2025-05-17 23:38:01,515 - __main__ - INFO -
- 2025-05-17 23:38:01,662 - sglang - INFO - [2025-05-17 23:38:01 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:38:01,663 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:38:01,669 - sglang - INFO - [2025-05-17 23:38:01 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:38:01,669 - __main__ - INFO - [2025-05-17 23:38:01 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:38:01,669 - sglang - INFO - [2025-05-17 23:38:01 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:38:01,669 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:38:01,847 - sglang - INFO - [2025-05-17 23:38:01 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:38:01,847 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:38:02,502 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:38:03,580 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s]
100%|██████████| 4/4 [00:01<00:00, 2.90it/s]
100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
- 2025-05-17 23:38:03,581 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s]
100%|██████████| 4/4 [00:01<00:00, 2.90it/s]
100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
- 2025-05-17 23:38:03,581 - sglang - INFO - [2025-05-17 23:38:03 TP0] Capture cuda graph end. Time elapsed: 1.70 s
- 2025-05-17 23:38:03,581 - __main__ - INFO - [2025-05-17 23:38:03 TP0] Capture cuda graph end. Time elapsed: 1.70 s
- 2025-05-17 23:38:03,582 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:38:04,636 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:38:05,702 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:38:05,961 - sglang - INFO - [2025-05-17 23:38:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:38:05,961 - __main__ - INFO - [2025-05-17 23:38:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:38:06,790 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:38:06,790 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:38:06,790 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:38:06,790 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:38:06,791 - __main__ - INFO - Worker 0 processing work item e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
- 2025-05-17 23:38:06,791 - __main__ - INFO - Created all tasks for e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
- 2025-05-17 23:38:06,795 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496237/input.pdf in worker 0
- 2025-05-17 23:38:07,030 - sglang - INFO - [2025-05-17 23:38:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:38:07,030 - __main__ - INFO - [2025-05-17 23:38:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:38:07,030 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:38:07,781 - sglang - INFO - [2025-05-17 23:38:07] The server is fired up and ready to roll!
- 2025-05-17 23:38:07,781 - __main__ - INFO - [2025-05-17 23:38:07] The server is fired up and ready to roll!
- 2025-05-17 23:38:13,176 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-1
- 2025-05-17 23:38:13,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-2
- 2025-05-17 23:38:13,217 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-3
- 2025-05-17 23:38:13,244 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-4
- 2025-05-17 23:38:13,248 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-5
- 2025-05-17 23:38:16,879 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:38:16,879 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:38:16,879 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:38:26,880 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:38:26,880 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:38:26,880 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:38:32,264 - sglang - INFO - [2025-05-17 23:38:32 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:38:32,264 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:38:33,067 - sglang - INFO - [2025-05-17 23:38:33 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:38:33,067 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:38:36,474 - sglang - INFO - [2025-05-17 23:38:36 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.64, #queue-req: 0
- 2025-05-17 23:38:36,474 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:36,881 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:38:36,881 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:38:36,882 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:38:37,329 - sglang - INFO - [2025-05-17 23:38:37 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.78, #queue-req: 0
- 2025-05-17 23:38:37,329 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:37,790 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:38:37,792 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:38:38,184 - sglang - INFO - [2025-05-17 23:38:38 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.89, #queue-req: 0
- 2025-05-17 23:38:38,185 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:39,039 - sglang - INFO - [2025-05-17 23:38:39 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.96, #queue-req: 0
- 2025-05-17 23:38:39,039 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:39,894 - sglang - INFO - [2025-05-17 23:38:39 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.90, #queue-req: 0
- 2025-05-17 23:38:39,894 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:40,751 - sglang - INFO - [2025-05-17 23:38:40 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.33, #queue-req: 0
- 2025-05-17 23:38:40,752 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:41,611 - sglang - INFO - [2025-05-17 23:38:41 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.67, #queue-req: 0
- 2025-05-17 23:38:41,611 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:42,472 - sglang - INFO - [2025-05-17 23:38:42 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.26, #queue-req: 0
- 2025-05-17 23:38:42,472 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:38:43,330 - sglang - INFO - [2025-05-17 23:38:43 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.73, #queue-req: 0
- 2025-05-17 23:38:43,330 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:38:44,170 - sglang - INFO - [2025-05-17 23:38:44 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.85, #queue-req: 0
- 2025-05-17 23:38:44,171 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:38:45,002 - sglang - INFO - [2025-05-17 23:38:45 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.14, #queue-req: 0
- 2025-05-17 23:38:45,002 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:38:45,826 - sglang - INFO - [2025-05-17 23:38:45 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.57, #queue-req: 0
- 2025-05-17 23:38:45,826 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:38:46,490 - __main__ - INFO - Finished TaskGroup for worker on e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
- 2025-05-17 23:38:46,491 - __main__ - INFO - Got 1 docs for e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
- 2025-05-17 23:38:46,492 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:38:46,492 - __main__ - INFO - Work done
- 2025-05-17 23:38:46,492 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:39:16,278 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:39:16,278 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496349/input.pdf as PDF document
- 2025-05-17 23:39:16,278 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:39:16,280 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 23:39:16,478 - __main__ - INFO - Starting pipeline with PID 456898
- 2025-05-17 23:39:16,478 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:39:22,084 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:39:23,125 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:39:24,182 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:39:25,216 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:39:26,267 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:39:27,345 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:39:28,420 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:39:28,442 - sglang - INFO - [2025-05-17 23:39:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=415081957, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:39:28,442 - __main__ - INFO - [2025-05-17 23:39:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=415081957, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:39:29,494 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:39:30,562 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:39:31,638 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:39:32,705 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:39:33,781 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:39:34,838 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:39:35,905 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:39:36,972 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:39:36,984 - sglang - INFO - [2025-05-17 23:39:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:39:36,984 - __main__ - INFO - [2025-05-17 23:39:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:39:38,048 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:39:39,114 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:39:40,170 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:39:41,236 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:39:42,303 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:39:43,369 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:39:43,699 - sglang - INFO - [2025-05-17 23:39:43 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:39:43,700 - __main__ - INFO - [2025-05-17 23:39:43 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:39:44,265 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:39:44,265 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Init torch distributed begin.
- 2025-05-17 23:39:44,266 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Init torch distributed begin.
- 2025-05-17 23:39:44,450 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:39:45,523 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:39:46,590 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:39:47,657 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:39:48,728 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:39:49,593 - sglang - INFO - [2025-05-17 23:39:49 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:39:49,593 - __main__ - INFO - [2025-05-17 23:39:49 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:39:49,812 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:39:50,680 - sglang - INFO - [2025-05-17 23:39:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:39:50,681 - __main__ - INFO - [2025-05-17 23:39:50 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:39:50,891 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:39:51,250 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:39:51,250 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:39:51,542 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-17 23:39:51,542 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-17 23:39:51,973 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:39:52,500 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
- 2025-05-17 23:39:52,500 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
- 2025-05-17 23:39:53,054 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:39:53,459 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
- 2025-05-17 23:39:53,459 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
- 2025-05-17 23:39:54,134 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:39:54,407 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
- 2025-05-17 23:39:54,408 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
- 2025-05-17 23:39:54,408 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-05-17 23:39:54,408 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-05-17 23:39:54,408 - sglang - INFO -
- 2025-05-17 23:39:54,408 - __main__ - INFO -
- 2025-05-17 23:39:54,555 - sglang - INFO - [2025-05-17 23:39:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:39:54,555 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:39:54,561 - sglang - INFO - [2025-05-17 23:39:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:39:54,561 - __main__ - INFO - [2025-05-17 23:39:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:39:54,561 - sglang - INFO - [2025-05-17 23:39:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:39:54,561 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:39:54,714 - sglang - INFO - [2025-05-17 23:39:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:39:54,714 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:39:55,213 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:39:56,293 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:39:56,404 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.93it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 23:39:56,404 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.87it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s]
100%|██████████| 4/4 [00:01<00:00, 2.93it/s]
100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
- 2025-05-17 23:39:56,404 - sglang - INFO - [2025-05-17 23:39:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 23:39:56,404 - __main__ - INFO - [2025-05-17 23:39:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
- 2025-05-17 23:39:57,372 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:39:58,442 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:39:59,508 - sglang - INFO - [2025-05-17 23:39:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:39:59,508 - __main__ - INFO - [2025-05-17 23:39:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:39:59,509 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 23:40:00,581 - sglang - INFO - [2025-05-17 23:40:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:40:00,581 - __main__ - INFO - [2025-05-17 23:40:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:40:00,581 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:40:00,616 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:40:00,616 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:40:00,617 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:40:00,617 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:40:00,617 - __main__ - INFO - Worker 0 processing work item cfa98586a6393f5105b98d306c800389907d0452
- 2025-05-17 23:40:00,617 - __main__ - INFO - Created all tasks for cfa98586a6393f5105b98d306c800389907d0452
- 2025-05-17 23:40:00,620 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747496349/input.pdf in worker 0
- 2025-05-17 23:40:01,096 - sglang - INFO - [2025-05-17 23:40:01] The server is fired up and ready to roll!
- 2025-05-17 23:40:01,096 - __main__ - INFO - [2025-05-17 23:40:01] The server is fired up and ready to roll!
- 2025-05-17 23:40:06,960 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496349/input.pdf-1
- 2025-05-17 23:40:10,679 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:40:10,679 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:40:10,680 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:40:20,681 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:40:20,681 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:40:20,682 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:40:27,865 - sglang - INFO - [2025-05-17 23:40:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:40:27,865 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:40:29,308 - sglang - INFO - [2025-05-17 23:40:29 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.34, #queue-req: 0
- 2025-05-17 23:40:29,309 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:30,130 - sglang - INFO - [2025-05-17 23:40:30 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.67, #queue-req: 0
- 2025-05-17 23:40:30,130 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:30,683 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:40:30,683 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:40:30,683 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:40:30,950 - sglang - INFO - [2025-05-17 23:40:30 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.79, #queue-req: 0
- 2025-05-17 23:40:30,950 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:31,390 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:40:31,390 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:40:31,769 - sglang - INFO - [2025-05-17 23:40:31 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.81, #queue-req: 0
- 2025-05-17 23:40:31,770 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:32,590 - sglang - INFO - [2025-05-17 23:40:32 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.73, #queue-req: 0
- 2025-05-17 23:40:32,590 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:33,412 - sglang - INFO - [2025-05-17 23:40:33 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.69, #queue-req: 0
- 2025-05-17 23:40:33,412 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:34,233 - sglang - INFO - [2025-05-17 23:40:34 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.71, #queue-req: 0
- 2025-05-17 23:40:34,233 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:40:34,281 - __main__ - INFO - Finished TaskGroup for worker on cfa98586a6393f5105b98d306c800389907d0452
- 2025-05-17 23:40:34,282 - __main__ - INFO - Got 1 docs for cfa98586a6393f5105b98d306c800389907d0452
- 2025-05-17 23:40:34,283 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:40:34,283 - __main__ - INFO - Work done
- 2025-05-17 23:40:34,284 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:41:02,747 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:41:02,747 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496456/input.pdf as PDF document
- 2025-05-17 23:41:02,747 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:41:02,751 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:41:02,980 - __main__ - INFO - Starting pipeline with PID 458294
- 2025-05-17 23:41:02,980 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:41:08,598 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:41:09,637 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:41:10,695 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:41:11,760 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:41:12,829 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:41:13,899 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:41:14,385 - sglang - INFO - [2025-05-17 23:41:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=289781152, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:41:14,385 - __main__ - INFO - [2025-05-17 23:41:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=289781152, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:41:14,978 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:41:16,045 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:41:17,111 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:41:17,736 - sglang - INFO - [2025-05-17 23:41:17] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:41:17,736 - __main__ - INFO - [2025-05-17 23:41:17] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:41:18,189 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:41:19,225 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:41:20,287 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:41:21,353 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:41:22,422 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:41:23,457 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:41:24,519 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:41:25,585 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:41:26,651 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:41:27,705 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:41:28,769 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:41:29,351 - sglang - INFO - [2025-05-17 23:41:29 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:41:29,351 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:41:29,848 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Init torch distributed begin.
- 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Init torch distributed begin.
- 2025-05-17 23:41:30,883 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:41:31,945 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:41:33,012 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:41:34,073 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:41:35,140 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:41:35,230 - sglang - INFO - [2025-05-17 23:41:35 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:41:35,230 - __main__ - INFO - [2025-05-17 23:41:35 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:41:36,218 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:41:36,278 - sglang - INFO - [2025-05-17 23:41:36 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:41:36,278 - __main__ - INFO - [2025-05-17 23:41:36 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:41:37,047 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:41:37,047 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:41:37,296 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:41:37,325 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.59it/s]
- 2025-05-17 23:41:37,326 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.59it/s]
- 2025-05-17 23:41:38,241 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.53it/s]
- 2025-05-17 23:41:38,241 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.53it/s]
- 2025-05-17 23:41:38,332 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:41:39,165 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.29it/s]
- 2025-05-17 23:41:39,166 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.29it/s]
- 2025-05-17 23:41:39,367 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:41:40,032 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 23:41:40,033 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
- 2025-05-17 23:41:40,033 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
- 2025-05-17 23:41:40,033 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
- 2025-05-17 23:41:40,033 - sglang - INFO -
- 2025-05-17 23:41:40,033 - __main__ - INFO -
- 2025-05-17 23:41:40,162 - sglang - INFO - [2025-05-17 23:41:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:41:40,163 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:41:40,168 - sglang - INFO - [2025-05-17 23:41:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:41:40,169 - __main__ - INFO - [2025-05-17 23:41:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:41:40,169 - sglang - INFO - [2025-05-17 23:41:40 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:41:40,169 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:41:40,320 - sglang - INFO - [2025-05-17 23:41:40 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:41:40,320 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:41:40,445 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:41:41,517 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:41:41,948 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.12it/s]
50%|█████ | 2/4 [00:01<00:01, 1.96it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s]
100%|██████████| 4/4 [00:01<00:00, 3.01it/s]
100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
- 2025-05-17 23:41:41,948 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.12it/s]
50%|█████ | 2/4 [00:01<00:01, 1.96it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s]
100%|██████████| 4/4 [00:01<00:00, 3.01it/s]
100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
- 2025-05-17 23:41:41,949 - sglang - INFO - [2025-05-17 23:41:41 TP0] Capture cuda graph end. Time elapsed: 1.63 s
- 2025-05-17 23:41:41,949 - __main__ - INFO - [2025-05-17 23:41:41 TP0] Capture cuda graph end. Time elapsed: 1.63 s
- 2025-05-17 23:41:42,597 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:41:43,664 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:41:44,630 - sglang - INFO - [2025-05-17 23:41:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:41:44,630 - __main__ - INFO - [2025-05-17 23:41:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:41:44,744 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:41:44,744 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:41:44,744 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:41:44,745 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:41:44,745 - __main__ - INFO - Worker 0 processing work item 199e15fb97f71d3ed170b35970694c1935783252
- 2025-05-17 23:41:44,745 - __main__ - INFO - Created all tasks for 199e15fb97f71d3ed170b35970694c1935783252
- 2025-05-17 23:41:44,750 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496456/input.pdf in worker 0
- 2025-05-17 23:41:45,787 - sglang - INFO - [2025-05-17 23:41:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:41:45,787 - __main__ - INFO - [2025-05-17 23:41:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:41:45,787 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:41:46,565 - sglang - INFO - [2025-05-17 23:41:46] The server is fired up and ready to roll!
- 2025-05-17 23:41:46,565 - __main__ - INFO - [2025-05-17 23:41:46] The server is fired up and ready to roll!
- 2025-05-17 23:41:51,133 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-1
- 2025-05-17 23:41:51,169 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-2
- 2025-05-17 23:41:51,195 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-4
- 2025-05-17 23:41:51,213 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-3
- 2025-05-17 23:41:51,227 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-5
- 2025-05-17 23:41:54,747 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:41:54,747 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:41:54,747 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:42:04,395 - sglang - INFO - [2025-05-17 23:42:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:42:04,395 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:42:04,748 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:42:04,748 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:42:04,748 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:42:06,400 - sglang - INFO - [2025-05-17 23:42:06 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:42:06,400 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:42:10,279 - sglang - INFO - [2025-05-17 23:42:10 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.71, #queue-req: 0
- 2025-05-17 23:42:10,279 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:11,137 - sglang - INFO - [2025-05-17 23:42:11 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.21, #queue-req: 0
- 2025-05-17 23:42:11,137 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:11,994 - sglang - INFO - [2025-05-17 23:42:11 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.23, #queue-req: 0
- 2025-05-17 23:42:11,994 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:12,851 - sglang - INFO - [2025-05-17 23:42:12 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.32, #queue-req: 0
- 2025-05-17 23:42:12,852 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:13,708 - sglang - INFO - [2025-05-17 23:42:13 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.36, #queue-req: 0
- 2025-05-17 23:42:13,709 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:14,567 - sglang - INFO - [2025-05-17 23:42:14 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.92, #queue-req: 0
- 2025-05-17 23:42:14,567 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:14,749 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:42:14,749 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:42:14,749 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:42:15,428 - sglang - INFO - [2025-05-17 23:42:15 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.31, #queue-req: 0
- 2025-05-17 23:42:15,428 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:16,290 - sglang - INFO - [2025-05-17 23:42:16 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.15, #queue-req: 0
- 2025-05-17 23:42:16,290 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:42:16,813 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:42:17,147 - sglang - INFO - [2025-05-17 23:42:17 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.99, #queue-req: 0
- 2025-05-17 23:42:17,147 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:42:17,990 - sglang - INFO - [2025-05-17 23:42:17 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.99, #queue-req: 0
- 2025-05-17 23:42:17,990 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:42:18,822 - sglang - INFO - [2025-05-17 23:42:18 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 79.30, #queue-req: 0
- 2025-05-17 23:42:18,822 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:42:19,647 - sglang - INFO - [2025-05-17 23:42:19 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.50, #queue-req: 0
- 2025-05-17 23:42:19,647 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:42:20,373 - __main__ - INFO - Finished TaskGroup for worker on 199e15fb97f71d3ed170b35970694c1935783252
- 2025-05-17 23:42:20,374 - __main__ - INFO - Got 1 docs for 199e15fb97f71d3ed170b35970694c1935783252
- 2025-05-17 23:42:20,375 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:42:20,375 - __main__ - INFO - Work done
- 2025-05-17 23:42:20,376 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:45:42,883 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:45:42,883 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496736/input.pdf as PDF document
- 2025-05-17 23:45:42,883 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:45:42,887 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:45:43,077 - __main__ - INFO - Starting pipeline with PID 460587
- 2025-05-17 23:45:43,077 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:45:48,716 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:45:49,762 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:45:50,819 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:45:51,864 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:45:52,912 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:45:53,957 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:45:54,317 - sglang - INFO - [2025-05-17 23:45:54] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=257394431, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:45:54,317 - __main__ - INFO - [2025-05-17 23:45:54] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=257394431, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:45:55,008 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:45:56,052 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:45:57,098 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:45:58,141 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:45:59,217 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:46:00,262 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:46:01,310 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:46:02,350 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:46:03,409 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:46:03,779 - sglang - INFO - [2025-05-17 23:46:03] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:46:03,779 - __main__ - INFO - [2025-05-17 23:46:03] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:46:03,784 - sglang - INFO - [2025-05-17 23:46:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:46:03,784 - __main__ - INFO - [2025-05-17 23:46:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Init torch distributed begin.
- 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Init torch distributed begin.
- 2025-05-17 23:46:04,489 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:46:05,559 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:46:06,630 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:46:07,696 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:46:08,764 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:46:09,817 - sglang - INFO - [2025-05-17 23:46:09 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:46:09,817 - __main__ - INFO - [2025-05-17 23:46:09 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:46:09,819 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:46:10,887 - sglang - INFO - [2025-05-17 23:46:10 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:46:10,888 - __main__ - INFO - [2025-05-17 23:46:10 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:46:10,889 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:46:11,701 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:46:11,701 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:46:11,968 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:46:11,978 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.61it/s]
- 2025-05-17 23:46:11,978 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.61it/s]
- 2025-05-17 23:46:12,904 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:46:12,905 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
- 2025-05-17 23:46:13,047 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:46:13,837 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:46:13,838 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-17 23:46:14,126 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:46:14,746 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:46:14,747 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:46:14,747 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:46:14,747 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:46:14,747 - sglang - INFO -
- 2025-05-17 23:46:14,747 - __main__ - INFO -
- 2025-05-17 23:46:14,893 - sglang - INFO - [2025-05-17 23:46:14 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:46:14,893 - __main__ - INFO - [2025-05-17 23:46:14 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:46:14,900 - sglang - INFO - [2025-05-17 23:46:14 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:46:14,900 - __main__ - INFO - [2025-05-17 23:46:14 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:46:14,900 - sglang - INFO - [2025-05-17 23:46:14 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:46:14,900 - __main__ - INFO - [2025-05-17 23:46:14 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:46:15,079 - sglang - INFO - [2025-05-17 23:46:15 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:46:15,079 - __main__ - INFO - [2025-05-17 23:46:15 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:46:15,205 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:46:16,285 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:46:16,750 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.07it/s]
50%|█████ | 2/4 [00:01<00:01, 1.89it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.51it/s]
100%|██████████| 4/4 [00:01<00:00, 2.97it/s]
100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
- 2025-05-17 23:46:16,750 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.07it/s]
50%|█████ | 2/4 [00:01<00:01, 1.89it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.51it/s]
100%|██████████| 4/4 [00:01<00:00, 2.97it/s]
100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
- 2025-05-17 23:46:16,750 - sglang - INFO - [2025-05-17 23:46:16 TP0] Capture cuda graph end. Time elapsed: 1.67 s
- 2025-05-17 23:46:16,751 - __main__ - INFO - [2025-05-17 23:46:16 TP0] Capture cuda graph end. Time elapsed: 1.67 s
- 2025-05-17 23:46:17,365 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:46:18,435 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:46:19,506 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:46:19,632 - sglang - INFO - [2025-05-17 23:46:19 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:46:19,632 - __main__ - INFO - [2025-05-17 23:46:19 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:46:20,601 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:46:20,601 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:46:20,601 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:46:20,601 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:46:20,602 - __main__ - INFO - Worker 0 processing work item d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
- 2025-05-17 23:46:20,602 - __main__ - INFO - Created all tasks for d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
- 2025-05-17 23:46:20,608 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496736/input.pdf in worker 0
- 2025-05-17 23:46:20,705 - sglang - INFO - [2025-05-17 23:46:20 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:46:20,705 - __main__ - INFO - [2025-05-17 23:46:20 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:46:20,705 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:46:21,302 - sglang - INFO - [2025-05-17 23:46:21] The server is fired up and ready to roll!
- 2025-05-17 23:46:21,303 - __main__ - INFO - [2025-05-17 23:46:21] The server is fired up and ready to roll!
- 2025-05-17 23:46:26,848 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-1
- 2025-05-17 23:46:26,884 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-2
- 2025-05-17 23:46:26,919 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-3
- 2025-05-17 23:46:26,947 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-4
- 2025-05-17 23:46:26,982 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-5
- 2025-05-17 23:46:30,679 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:46:30,679 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:46:30,679 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:46:40,681 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:46:40,681 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:46:40,682 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:46:45,601 - sglang - INFO - [2025-05-17 23:46:45 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:46:45,601 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:46:46,399 - sglang - INFO - [2025-05-17 23:46:46 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:46:46,399 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:46:49,831 - sglang - INFO - [2025-05-17 23:46:49 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.70, #queue-req: 0
- 2025-05-17 23:46:49,831 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:50,684 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:46:50,684 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:46:50,684 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:46:50,688 - sglang - INFO - [2025-05-17 23:46:50 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.27, #queue-req: 0
- 2025-05-17 23:46:50,689 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:51,552 - sglang - INFO - [2025-05-17 23:46:51 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 231.64, #queue-req: 0
- 2025-05-17 23:46:51,552 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:51,988 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:46:51,988 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:46:52,413 - sglang - INFO - [2025-05-17 23:46:52 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.14, #queue-req: 0
- 2025-05-17 23:46:52,414 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:53,274 - sglang - INFO - [2025-05-17 23:46:53 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.48, #queue-req: 0
- 2025-05-17 23:46:53,274 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:54,135 - sglang - INFO - [2025-05-17 23:46:54 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.35, #queue-req: 0
- 2025-05-17 23:46:54,135 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:54,997 - sglang - INFO - [2025-05-17 23:46:54 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.81, #queue-req: 0
- 2025-05-17 23:46:54,997 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:55,860 - sglang - INFO - [2025-05-17 23:46:55 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.77, #queue-req: 0
- 2025-05-17 23:46:55,861 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:46:56,720 - sglang - INFO - [2025-05-17 23:46:56 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.26, #queue-req: 0
- 2025-05-17 23:46:56,720 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:46:57,563 - sglang - INFO - [2025-05-17 23:46:57 TP0] Decode batch. #running-req: 2, #token: 5146, token usage: 0.14, gen throughput (token/s): 137.59, #queue-req: 0
- 2025-05-17 23:46:57,564 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-17 23:46:58,391 - sglang - INFO - [2025-05-17 23:46:58 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 62.83, #queue-req: 0
- 2025-05-17 23:46:58,391 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:46:59,217 - sglang - INFO - [2025-05-17 23:46:59 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.45, #queue-req: 0
- 2025-05-17 23:46:59,217 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:46:59,884 - __main__ - INFO - Finished TaskGroup for worker on d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
- 2025-05-17 23:46:59,884 - __main__ - INFO - Got 1 docs for d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
- 2025-05-17 23:46:59,885 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:46:59,886 - __main__ - INFO - Work done
- 2025-05-17 23:46:59,886 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:47:46,541 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:47:46,542 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496860/input.pdf as PDF document
- 2025-05-17 23:47:46,542 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:47:46,546 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:47:46,785 - __main__ - INFO - Starting pipeline with PID 462397
- 2025-05-17 23:47:46,785 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:47:47,586 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:47:48,624 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:47:49,683 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:47:50,748 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:47:51,817 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:47:52,873 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:47:52,883 - sglang - INFO - [2025-05-17 23:47:52] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=791956393, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:47:52,883 - __main__ - INFO - [2025-05-17 23:47:52] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=791956393, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:47:53,946 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:47:55,001 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:47:56,068 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:47:57,139 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:47:58,189 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:47:58,316 - sglang - INFO - [2025-05-17 23:47:58] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:47:58,316 - __main__ - INFO - [2025-05-17 23:47:58] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:47:59,269 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:48:00,340 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:48:01,392 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:48:02,458 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:48:03,524 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:48:04,590 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:48:05,653 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:48:06,708 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:48:07,292 - sglang - INFO - [2025-05-17 23:48:07 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:48:07,292 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:48:07,786 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:48:07,810 - sglang - INFO - [2025-05-17 23:48:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:48:07,810 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:48:07,810 - sglang - INFO - [2025-05-17 23:48:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:48:07,811 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:48:07,811 - sglang - INFO - [2025-05-17 23:48:07 TP0] Init torch distributed begin.
- 2025-05-17 23:48:07,811 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Init torch distributed begin.
- 2025-05-17 23:48:08,865 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:48:09,932 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:48:10,999 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:48:12,065 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:48:13,135 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:48:13,220 - sglang - INFO - [2025-05-17 23:48:13 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:48:13,221 - __main__ - INFO - [2025-05-17 23:48:13 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:48:14,215 - sglang - INFO - [2025-05-17 23:48:14 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:48:14,216 - __main__ - INFO - [2025-05-17 23:48:14 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:48:14,217 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:48:14,702 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:48:14,702 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:48:14,989 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.49it/s]
- 2025-05-17 23:48:14,989 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.49it/s]
- 2025-05-17 23:48:15,296 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:48:15,933 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-05-17 23:48:15,933 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-05-17 23:48:16,377 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:48:16,879 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
- 2025-05-17 23:48:16,879 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
- 2025-05-17 23:48:17,456 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:48:17,766 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:48:17,766 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-05-17 23:48:17,766 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:48:17,767 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
- 2025-05-17 23:48:17,767 - sglang - INFO -
- 2025-05-17 23:48:17,767 - __main__ - INFO -
- 2025-05-17 23:48:17,898 - sglang - INFO - [2025-05-17 23:48:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:48:17,899 - __main__ - INFO - [2025-05-17 23:48:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:48:17,904 - sglang - INFO - [2025-05-17 23:48:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:48:17,904 - __main__ - INFO - [2025-05-17 23:48:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:48:17,905 - sglang - INFO - [2025-05-17 23:48:17 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:48:17,905 - __main__ - INFO - [2025-05-17 23:48:17 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:48:18,057 - sglang - INFO - [2025-05-17 23:48:18 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:48:18,057 - __main__ - INFO - [2025-05-17 23:48:18 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:48:18,534 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:48:19,613 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:48:19,717 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.09it/s]
50%|█████ | 2/4 [00:01<00:01, 1.91it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.52it/s]
100%|██████████| 4/4 [00:01<00:00, 2.96it/s]
100%|██████████| 4/4 [00:01<00:00, 2.41it/s]
- 2025-05-17 23:48:19,717 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.09it/s]
50%|█████ | 2/4 [00:01<00:01, 1.91it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.52it/s]
100%|██████████| 4/4 [00:01<00:00, 2.96it/s]
100%|██████████| 4/4 [00:01<00:00, 2.41it/s]
- 2025-05-17 23:48:19,718 - sglang - INFO - [2025-05-17 23:48:19 TP0] Capture cuda graph end. Time elapsed: 1.66 s
- 2025-05-17 23:48:19,718 - __main__ - INFO - [2025-05-17 23:48:19 TP0] Capture cuda graph end. Time elapsed: 1.66 s
- 2025-05-17 23:48:20,689 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:48:21,744 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:48:22,450 - sglang - INFO - [2025-05-17 23:48:22 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:48:22,451 - __main__ - INFO - [2025-05-17 23:48:22 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:48:22,837 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:48:22,837 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:48:22,838 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:48:22,838 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:48:22,838 - __main__ - INFO - Worker 0 processing work item 206e70c2ba138820c52d22ba8bfb11820a7b737b
- 2025-05-17 23:48:22,838 - __main__ - INFO - Created all tasks for 206e70c2ba138820c52d22ba8bfb11820a7b737b
- 2025-05-17 23:48:22,844 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496860/input.pdf in worker 0
- 2025-05-17 23:48:23,527 - sglang - INFO - [2025-05-17 23:48:23 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:48:23,528 - __main__ - INFO - [2025-05-17 23:48:23 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:48:23,528 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:48:24,409 - sglang - INFO - [2025-05-17 23:48:24] The server is fired up and ready to roll!
- 2025-05-17 23:48:24,409 - __main__ - INFO - [2025-05-17 23:48:24] The server is fired up and ready to roll!
- 2025-05-17 23:48:29,517 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-1
- 2025-05-17 23:48:29,542 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-2
- 2025-05-17 23:48:29,553 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-4
- 2025-05-17 23:48:29,560 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-3
- 2025-05-17 23:48:29,583 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-5
- 2025-05-17 23:48:32,838 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:48:32,839 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:48:32,839 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:48:40,487 - sglang - INFO - [2025-05-17 23:48:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:48:40,488 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:48:42,508 - sglang - INFO - [2025-05-17 23:48:42 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:48:42,508 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:48:42,839 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:48:42,840 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:48:42,840 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:48:47,326 - sglang - INFO - [2025-05-17 23:48:47 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.91, #queue-req: 0
- 2025-05-17 23:48:47,327 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:48,191 - sglang - INFO - [2025-05-17 23:48:48 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.36, #queue-req: 0
- 2025-05-17 23:48:48,191 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:49,054 - sglang - INFO - [2025-05-17 23:48:49 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 231.71, #queue-req: 0
- 2025-05-17 23:48:49,054 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:49,917 - sglang - INFO - [2025-05-17 23:48:49 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 231.67, #queue-req: 0
- 2025-05-17 23:48:49,917 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:50,779 - sglang - INFO - [2025-05-17 23:48:50 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.02, #queue-req: 0
- 2025-05-17 23:48:50,779 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:51,644 - sglang - INFO - [2025-05-17 23:48:51 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 231.35, #queue-req: 0
- 2025-05-17 23:48:51,644 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:52,506 - sglang - INFO - [2025-05-17 23:48:52 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.03, #queue-req: 0
- 2025-05-17 23:48:52,506 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:52,841 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:48:52,841 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:48:52,841 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:48:53,367 - sglang - INFO - [2025-05-17 23:48:53 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.19, #queue-req: 0
- 2025-05-17 23:48:53,367 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:48:54,227 - sglang - INFO - [2025-05-17 23:48:54 TP0] Decode batch. #running-req: 4, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.53, #queue-req: 0
- 2025-05-17 23:48:54,227 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:48:54,815 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:48:55,070 - sglang - INFO - [2025-05-17 23:48:55 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.35, #queue-req: 0
- 2025-05-17 23:48:55,070 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:48:55,905 - sglang - INFO - [2025-05-17 23:48:55 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 83.86, #queue-req: 0
- 2025-05-17 23:48:55,905 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:48:56,731 - sglang - INFO - [2025-05-17 23:48:56 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.44, #queue-req: 0
- 2025-05-17 23:48:56,731 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:48:57,521 - __main__ - INFO - Finished TaskGroup for worker on 206e70c2ba138820c52d22ba8bfb11820a7b737b
- 2025-05-17 23:48:57,521 - __main__ - INFO - Got 1 docs for 206e70c2ba138820c52d22ba8bfb11820a7b737b
- 2025-05-17 23:48:57,523 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:48:57,523 - __main__ - INFO - Work done
- 2025-05-17 23:48:57,523 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:49:27,025 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:49:27,026 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496960/input.pdf as PDF document
- 2025-05-17 23:49:27,026 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:49:27,030 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:49:27,269 - __main__ - INFO - Starting pipeline with PID 464121
- 2025-05-17 23:49:27,269 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:49:33,982 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:49:35,028 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:49:36,085 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:49:37,143 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:49:38,201 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:49:39,258 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:49:40,248 - sglang - INFO - [2025-05-17 23:49:40] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=655980969, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:49:40,248 - __main__ - INFO - [2025-05-17 23:49:40] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=655980969, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:49:40,301 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:49:41,365 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:49:42,423 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:49:43,481 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:49:44,257 - sglang - INFO - [2025-05-17 23:49:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:49:44,257 - __main__ - INFO - [2025-05-17 23:49:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:49:44,540 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:49:45,585 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:49:46,634 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:49:47,701 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:49:48,772 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:49:49,842 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:49:50,908 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:49:51,974 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:49:53,036 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:49:54,091 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:49:55,157 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:49:56,225 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:49:57,290 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:49:58,355 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:49:59,429 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:50:00,500 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:50:01,570 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:50:02,641 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:50:03,711 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:50:03,784 - sglang - INFO - [2025-05-17 23:50:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:50:03,784 - __main__ - INFO - [2025-05-17 23:50:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:50:04,792 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:50:05,862 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:50:06,928 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:50:07,994 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:50:09,061 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:50:10,115 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Init torch distributed begin.
- 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Init torch distributed begin.
- 2025-05-17 23:50:11,193 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 23:50:12,262 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-17 23:50:13,329 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-17 23:50:14,394 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-05-17 23:50:15,461 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-05-17 23:50:15,784 - sglang - INFO - [2025-05-17 23:50:15 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:50:15,784 - __main__ - INFO - [2025-05-17 23:50:15 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:50:16,540 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-05-17 23:50:17,404 - sglang - INFO - [2025-05-17 23:50:17 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:50:17,404 - __main__ - INFO - [2025-05-17 23:50:17 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:50:17,618 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-05-17 23:50:17,922 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:50:17,922 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:50:18,292 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.70it/s]
- 2025-05-17 23:50:18,292 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.70it/s]
- 2025-05-17 23:50:18,697 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-05-17 23:50:19,427 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
- 2025-05-17 23:50:19,427 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
- 2025-05-17 23:50:19,776 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-05-17 23:50:20,591 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.02it/s]
- 2025-05-17 23:50:20,591 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.02it/s]
- 2025-05-17 23:50:20,856 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-05-17 23:50:21,685 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.02s/it]
- 2025-05-17 23:50:21,685 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.02s/it]
- 2025-05-17 23:50:21,686 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
- 2025-05-17 23:50:21,686 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
- 2025-05-17 23:50:21,686 - sglang - INFO -
- 2025-05-17 23:50:21,686 - __main__ - INFO -
- 2025-05-17 23:50:21,934 - sglang - INFO - [2025-05-17 23:50:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:50:21,934 - __main__ - INFO - [2025-05-17 23:50:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:50:21,935 - sglang - INFO - [2025-05-17 23:50:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:50:21,935 - __main__ - INFO - [2025-05-17 23:50:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:50:21,935 - sglang - INFO - [2025-05-17 23:50:21 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:50:21,935 - __main__ - INFO - [2025-05-17 23:50:21 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:50:21,936 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-05-17 23:50:22,026 - sglang - INFO - [2025-05-17 23:50:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:50:22,027 - __main__ - INFO - [2025-05-17 23:50:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:50:23,016 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-05-17 23:50:23,793 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.01s/it]
50%|█████ | 2/4 [00:01<00:01, 1.78it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.37it/s]
100%|██████████| 4/4 [00:01<00:00, 2.83it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 23:50:23,793 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.01s/it]
50%|█████ | 2/4 [00:01<00:01, 1.78it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.37it/s]
100%|██████████| 4/4 [00:01<00:00, 2.83it/s]
100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
- 2025-05-17 23:50:23,793 - sglang - INFO - [2025-05-17 23:50:23 TP0] Capture cuda graph end. Time elapsed: 1.77 s
- 2025-05-17 23:50:23,793 - __main__ - INFO - [2025-05-17 23:50:23 TP0] Capture cuda graph end. Time elapsed: 1.77 s
- 2025-05-17 23:50:24,094 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-05-17 23:50:25,149 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-05-17 23:50:26,214 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-05-17 23:50:26,513 - sglang - INFO - [2025-05-17 23:50:26 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:50:26,513 - __main__ - INFO - [2025-05-17 23:50:26 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:50:27,309 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:50:27,309 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:50:27,309 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:50:27,309 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:50:27,309 - __main__ - INFO - Worker 0 processing work item 9409ac69e0698bb53cba0d186d3996d4d9f95a62
- 2025-05-17 23:50:27,309 - __main__ - INFO - Created all tasks for 9409ac69e0698bb53cba0d186d3996d4d9f95a62
- 2025-05-17 23:50:27,315 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496960/input.pdf in worker 0
- 2025-05-17 23:50:27,589 - sglang - INFO - [2025-05-17 23:50:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:50:27,589 - __main__ - INFO - [2025-05-17 23:50:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:50:27,589 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:50:28,282 - sglang - INFO - [2025-05-17 23:50:28] The server is fired up and ready to roll!
- 2025-05-17 23:50:28,282 - __main__ - INFO - [2025-05-17 23:50:28] The server is fired up and ready to roll!
- 2025-05-17 23:50:33,560 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-1
- 2025-05-17 23:50:33,599 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-2
- 2025-05-17 23:50:33,601 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-3
- 2025-05-17 23:50:33,632 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-5
- 2025-05-17 23:50:33,633 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-4
- 2025-05-17 23:50:37,311 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:50:37,311 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:50:37,311 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:50:42,193 - sglang - INFO - [2025-05-17 23:50:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:50:42,194 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:50:44,294 - sglang - INFO - [2025-05-17 23:50:44 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:50:44,294 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:50:47,312 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:50:47,313 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:50:47,313 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:50:49,825 - sglang - INFO - [2025-05-17 23:50:49 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 7.38, #queue-req: 0
- 2025-05-17 23:50:49,826 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:50,684 - sglang - INFO - [2025-05-17 23:50:50 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.91, #queue-req: 0
- 2025-05-17 23:50:50,684 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:51,543 - sglang - INFO - [2025-05-17 23:50:51 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.78, #queue-req: 0
- 2025-05-17 23:50:51,543 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:52,401 - sglang - INFO - [2025-05-17 23:50:52 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.19, #queue-req: 0
- 2025-05-17 23:50:52,401 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:53,259 - sglang - INFO - [2025-05-17 23:50:53 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.05, #queue-req: 0
- 2025-05-17 23:50:53,259 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:54,119 - sglang - INFO - [2025-05-17 23:50:54 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.53, #queue-req: 0
- 2025-05-17 23:50:54,120 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:54,981 - sglang - INFO - [2025-05-17 23:50:54 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.07, #queue-req: 0
- 2025-05-17 23:50:54,981 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:55,843 - sglang - INFO - [2025-05-17 23:50:55 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.13, #queue-req: 0
- 2025-05-17 23:50:55,843 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:50:56,705 - sglang - INFO - [2025-05-17 23:50:56 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.89, #queue-req: 0
- 2025-05-17 23:50:56,705 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:50:57,314 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:50:57,314 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 44.56 44.56
- sglang_output_tokens 7.70 7.70
- 2025-05-17 23:50:57,314 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 5
- 2025-05-17 23:50:57,548 - sglang - INFO - [2025-05-17 23:50:57 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.28, #queue-req: 0
- 2025-05-17 23:50:57,548 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:50:58,382 - sglang - INFO - [2025-05-17 23:50:58 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 77.97, #queue-req: 0
- 2025-05-17 23:50:58,382 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:50:59,207 - sglang - INFO - [2025-05-17 23:50:59 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.49, #queue-req: 0
- 2025-05-17 23:50:59,207 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:50:59,217 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:50:59,217 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:50:59,872 - __main__ - INFO - Finished TaskGroup for worker on 9409ac69e0698bb53cba0d186d3996d4d9f95a62
- 2025-05-17 23:50:59,873 - __main__ - INFO - Got 1 docs for 9409ac69e0698bb53cba0d186d3996d4d9f95a62
- 2025-05-17 23:50:59,874 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:50:59,875 - __main__ - INFO - Work done
- 2025-05-17 23:50:59,875 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:51:31,061 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:51:31,062 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497084/input.pdf as PDF document
- 2025-05-17 23:51:31,062 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:51:31,066 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:51:31,312 - __main__ - INFO - Starting pipeline with PID 465812
- 2025-05-17 23:51:31,312 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:51:36,943 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:51:37,990 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:51:39,041 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:51:40,106 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:51:41,172 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:51:42,217 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:51:42,690 - sglang - INFO - [2025-05-17 23:51:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247307952, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:51:42,690 - __main__ - INFO - [2025-05-17 23:51:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247307952, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:51:43,265 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:51:44,298 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:51:45,345 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:51:46,412 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:51:47,478 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:51:48,544 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:51:49,610 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:51:50,676 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:51:51,742 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:51:52,530 - sglang - INFO - [2025-05-17 23:51:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:51:52,530 - __main__ - INFO - [2025-05-17 23:51:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:51:52,821 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:51:53,892 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:51:54,957 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:51:56,024 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:51:57,078 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:51:57,861 - sglang - INFO - [2025-05-17 23:51:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:51:57,861 - __main__ - INFO - [2025-05-17 23:51:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:51:58,157 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:51:58,646 - sglang - INFO - [2025-05-17 23:51:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:51:58,646 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:51:58,646 - sglang - INFO - [2025-05-17 23:51:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:51:58,646 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:51:58,647 - sglang - INFO - [2025-05-17 23:51:58 TP0] Init torch distributed begin.
- 2025-05-17 23:51:58,647 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Init torch distributed begin.
- 2025-05-17 23:51:59,236 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:52:00,302 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:52:01,368 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:52:02,433 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:52:03,500 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:52:04,027 - sglang - INFO - [2025-05-17 23:52:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:52:04,027 - __main__ - INFO - [2025-05-17 23:52:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:52:04,580 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:52:05,517 - sglang - INFO - [2025-05-17 23:52:05 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:52:05,517 - __main__ - INFO - [2025-05-17 23:52:05 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:52:05,658 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:52:06,011 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:52:06,011 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:52:06,339 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.06it/s]
- 2025-05-17 23:52:06,339 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.06it/s]
- 2025-05-17 23:52:06,737 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:52:07,396 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.32it/s]
- 2025-05-17 23:52:07,397 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.32it/s]
- 2025-05-17 23:52:07,817 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:52:08,454 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.12it/s]
- 2025-05-17 23:52:08,454 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.12it/s]
- 2025-05-17 23:52:08,895 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-17 23:52:09,493 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
- 2025-05-17 23:52:09,494 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
- 2025-05-17 23:52:09,494 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
- 2025-05-17 23:52:09,494 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
- 2025-05-17 23:52:09,494 - sglang - INFO -
- 2025-05-17 23:52:09,494 - __main__ - INFO -
- 2025-05-17 23:52:09,639 - sglang - INFO - [2025-05-17 23:52:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:52:09,639 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:52:09,645 - sglang - INFO - [2025-05-17 23:52:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:52:09,645 - __main__ - INFO - [2025-05-17 23:52:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:52:09,645 - sglang - INFO - [2025-05-17 23:52:09 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:52:09,645 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:52:09,795 - sglang - INFO - [2025-05-17 23:52:09 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:52:09,796 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:52:09,973 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-17 23:52:11,054 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-17 23:52:11,462 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.10it/s]
50%|█████ | 2/4 [00:01<00:01, 1.92it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.94it/s]
100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
- 2025-05-17 23:52:11,462 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.10it/s]
50%|█████ | 2/4 [00:01<00:01, 1.92it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.94it/s]
100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
- 2025-05-17 23:52:11,462 - sglang - INFO - [2025-05-17 23:52:11 TP0] Capture cuda graph end. Time elapsed: 1.67 s
- 2025-05-17 23:52:11,462 - __main__ - INFO - [2025-05-17 23:52:11 TP0] Capture cuda graph end. Time elapsed: 1.67 s
- 2025-05-17 23:52:12,133 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-17 23:52:13,202 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-17 23:52:14,268 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-17 23:52:14,610 - sglang - INFO - [2025-05-17 23:52:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:52:14,610 - __main__ - INFO - [2025-05-17 23:52:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:52:15,364 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:52:15,364 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:52:15,364 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:52:15,364 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:52:15,364 - __main__ - INFO - Worker 0 processing work item e583124473577446455a2982cc1a1469d21fc0a1
- 2025-05-17 23:52:15,365 - __main__ - INFO - Created all tasks for e583124473577446455a2982cc1a1469d21fc0a1
- 2025-05-17 23:52:15,371 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747497084/input.pdf in worker 0
- 2025-05-17 23:52:15,684 - sglang - INFO - [2025-05-17 23:52:15 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:52:15,684 - __main__ - INFO - [2025-05-17 23:52:15 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:52:15,684 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:52:16,505 - sglang - INFO - [2025-05-17 23:52:16] The server is fired up and ready to roll!
- 2025-05-17 23:52:16,505 - __main__ - INFO - [2025-05-17 23:52:16] The server is fired up and ready to roll!
- 2025-05-17 23:52:21,875 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-1
- 2025-05-17 23:52:21,904 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-2
- 2025-05-17 23:52:21,919 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-3
- 2025-05-17 23:52:21,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-4
- 2025-05-17 23:52:21,958 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-5
- 2025-05-17 23:52:25,379 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:52:25,379 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:52:25,379 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:52:35,381 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:52:35,381 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:52:35,381 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:52:41,907 - sglang - INFO - [2025-05-17 23:52:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:52:41,907 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:52:42,739 - sglang - INFO - [2025-05-17 23:52:42 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-17 23:52:42,739 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:52:45,382 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:52:45,383 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:52:45,383 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-17 23:52:46,181 - sglang - INFO - [2025-05-17 23:52:46 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.45, #queue-req: 0
- 2025-05-17 23:52:46,181 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:47,040 - sglang - INFO - [2025-05-17 23:52:47 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.66, #queue-req: 0
- 2025-05-17 23:52:47,041 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:47,196 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:52:47,899 - sglang - INFO - [2025-05-17 23:52:47 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.95, #queue-req: 0
- 2025-05-17 23:52:47,899 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:48,757 - sglang - INFO - [2025-05-17 23:52:48 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.08, #queue-req: 0
- 2025-05-17 23:52:48,757 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:49,616 - sglang - INFO - [2025-05-17 23:52:49 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.85, #queue-req: 0
- 2025-05-17 23:52:49,616 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:50,476 - sglang - INFO - [2025-05-17 23:52:50 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.65, #queue-req: 0
- 2025-05-17 23:52:50,476 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:51,337 - sglang - INFO - [2025-05-17 23:52:51 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.18, #queue-req: 0
- 2025-05-17 23:52:51,337 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:52,198 - sglang - INFO - [2025-05-17 23:52:52 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.21, #queue-req: 0
- 2025-05-17 23:52:52,199 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-17 23:52:53,059 - sglang - INFO - [2025-05-17 23:52:53 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.39, #queue-req: 0
- 2025-05-17 23:52:53,059 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-17 23:52:53,902 - sglang - INFO - [2025-05-17 23:52:53 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.75, #queue-req: 0
- 2025-05-17 23:52:53,903 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-17 23:52:54,737 - sglang - INFO - [2025-05-17 23:52:54 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 94.63, #queue-req: 0
- 2025-05-17 23:52:54,737 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:52:55,384 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:52:55,384 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 92.17 92.17
- sglang_output_tokens 18.08 18.08
- 2025-05-17 23:52:55,384 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 5
- 2025-05-17 23:52:55,562 - sglang - INFO - [2025-05-17 23:52:55 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.48, #queue-req: 0
- 2025-05-17 23:52:55,563 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:52:55,961 - __main__ - INFO - Finished TaskGroup for worker on e583124473577446455a2982cc1a1469d21fc0a1
- 2025-05-17 23:52:55,961 - __main__ - INFO - Got 1 docs for e583124473577446455a2982cc1a1469d21fc0a1
- 2025-05-17 23:52:55,963 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:52:55,963 - __main__ - INFO - Work done
- 2025-05-17 23:52:55,963 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:58:19,645 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:58:19,645 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497493/input.pdf as PDF document
- 2025-05-17 23:58:19,645 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:58:19,648 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-17 23:58:19,843 - __main__ - INFO - Starting pipeline with PID 468563
- 2025-05-17 23:58:19,843 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-17 23:58:20,381 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-17 23:58:21,419 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-17 23:58:22,477 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-17 23:58:23,522 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-17 23:58:24,584 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-17 23:58:25,660 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-17 23:58:26,623 - sglang - INFO - [2025-05-17 23:58:26] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=124456914, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:58:26,623 - __main__ - INFO - [2025-05-17 23:58:26] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=124456914, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-17 23:58:26,786 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-17 23:58:27,846 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-17 23:58:28,891 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-17 23:58:29,957 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-17 23:58:31,027 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-17 23:58:32,094 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-17 23:58:33,162 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-17 23:58:34,229 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-17 23:58:35,295 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-17 23:58:36,208 - sglang - INFO - [2025-05-17 23:58:36 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:58:36,209 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-17 23:58:36,370 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-17 23:58:36,716 - sglang - INFO - [2025-05-17 23:58:36 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-17 23:58:36,717 - sglang - INFO - [2025-05-17 23:58:36 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-17 23:58:36,717 - sglang - INFO - [2025-05-17 23:58:36 TP0] Init torch distributed begin.
- 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Init torch distributed begin.
- 2025-05-17 23:58:37,444 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-17 23:58:38,498 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-17 23:58:39,563 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-17 23:58:40,618 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-17 23:58:41,684 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-17 23:58:42,052 - sglang - INFO - [2025-05-17 23:58:42 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:58:42,052 - __main__ - INFO - [2025-05-17 23:58:42 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-17 23:58:42,764 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-17 23:58:43,224 - sglang - INFO - [2025-05-17 23:58:43 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:58:43,224 - __main__ - INFO - [2025-05-17 23:58:43 TP0] Using model weights format ['*.safetensors']
- 2025-05-17 23:58:43,844 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-17 23:58:44,325 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:58:44,325 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-17 23:58:44,615 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.46it/s]
- 2025-05-17 23:58:44,615 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.46it/s]
- 2025-05-17 23:58:44,923 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-17 23:58:45,329 - sglang - INFO - [2025-05-17 23:58:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:58:45,330 - __main__ - INFO - [2025-05-17 23:58:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-17 23:58:45,559 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.48it/s]
- 2025-05-17 23:58:45,559 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.48it/s]
- 2025-05-17 23:58:46,002 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-17 23:58:46,501 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
- 2025-05-17 23:58:46,501 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
- 2025-05-17 23:58:47,082 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-17 23:58:47,433 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.18it/s]
- 2025-05-17 23:58:47,434 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.18it/s]
- 2025-05-17 23:58:47,434 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
- 2025-05-17 23:58:47,434 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
- 2025-05-17 23:58:47,434 - sglang - INFO -
- 2025-05-17 23:58:47,434 - __main__ - INFO -
- 2025-05-17 23:58:47,582 - sglang - INFO - [2025-05-17 23:58:47 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:58:47,582 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-17 23:58:47,589 - sglang - INFO - [2025-05-17 23:58:47 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:58:47,589 - __main__ - INFO - [2025-05-17 23:58:47 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-17 23:58:47,589 - sglang - INFO - [2025-05-17 23:58:47 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:58:47,589 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-17 23:58:47,769 - sglang - INFO - [2025-05-17 23:58:47 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:58:47,769 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-17 23:58:48,161 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-17 23:58:49,239 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-17 23:58:49,501 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.01it/s]
50%|█████ | 2/4 [00:01<00:01, 1.81it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s]
100%|██████████| 4/4 [00:01<00:00, 2.88it/s]
100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
- 2025-05-17 23:58:49,501 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.01it/s]
50%|█████ | 2/4 [00:01<00:01, 1.81it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s]
100%|██████████| 4/4 [00:01<00:00, 2.88it/s]
100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
- 2025-05-17 23:58:49,501 - sglang - INFO - [2025-05-17 23:58:49 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-17 23:58:49,501 - __main__ - INFO - [2025-05-17 23:58:49 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-17 23:58:50,318 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-17 23:58:51,389 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-17 23:58:52,182 - sglang - INFO - [2025-05-17 23:58:52 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:58:52,182 - __main__ - INFO - [2025-05-17 23:58:52 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-17 23:58:52,483 - __main__ - INFO - sglang server is ready.
- 2025-05-17 23:58:52,483 - __main__ - INFO - Queue remaining: 1
- 2025-05-17 23:58:52,483 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:58:52,483 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-17 23:58:52,483 - __main__ - INFO - Worker 0 processing work item a4f0675d63ce13f5a08d86042553d3bccd4ce38c
- 2025-05-17 23:58:52,484 - __main__ - INFO - Created all tasks for a4f0675d63ce13f5a08d86042553d3bccd4ce38c
- 2025-05-17 23:58:52,486 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747497493/input.pdf in worker 0
- 2025-05-17 23:58:53,285 - sglang - INFO - [2025-05-17 23:58:53 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:58:53,285 - __main__ - INFO - [2025-05-17 23:58:53 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:58:53,285 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:58:53,916 - sglang - INFO - [2025-05-17 23:58:53] The server is fired up and ready to roll!
- 2025-05-17 23:58:53,917 - __main__ - INFO - [2025-05-17 23:58:53] The server is fired up and ready to roll!
- 2025-05-17 23:58:58,742 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497493/input.pdf-1
- 2025-05-17 23:59:02,484 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:59:02,485 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:59:02,485 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:59:12,486 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:59:12,487 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:59:12,487 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:59:20,219 - sglang - INFO - [2025-05-17 23:59:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-17 23:59:20,219 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-17 23:59:21,653 - sglang - INFO - [2025-05-17 23:59:21 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.36, #queue-req: 0
- 2025-05-17 23:59:21,654 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:22,472 - sglang - INFO - [2025-05-17 23:59:22 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.89, #queue-req: 0
- 2025-05-17 23:59:22,472 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:22,487 - __main__ - INFO - Queue remaining: 0
- 2025-05-17 23:59:22,487 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-17 23:59:22,488 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-17 23:59:23,289 - sglang - INFO - [2025-05-17 23:59:23 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.94, #queue-req: 0
- 2025-05-17 23:59:23,289 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:24,106 - sglang - INFO - [2025-05-17 23:59:24 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.93, #queue-req: 0
- 2025-05-17 23:59:24,107 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:24,590 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-17 23:59:24,592 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-17 23:59:24,592 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-17 23:59:24,924 - sglang - INFO - [2025-05-17 23:59:24 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.90, #queue-req: 0
- 2025-05-17 23:59:24,924 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:25,743 - sglang - INFO - [2025-05-17 23:59:25 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.83, #queue-req: 0
- 2025-05-17 23:59:25,744 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:26,563 - sglang - INFO - [2025-05-17 23:59:26 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.79, #queue-req: 0
- 2025-05-17 23:59:26,563 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-17 23:59:26,621 - __main__ - INFO - Finished TaskGroup for worker on a4f0675d63ce13f5a08d86042553d3bccd4ce38c
- 2025-05-17 23:59:26,621 - __main__ - INFO - Got 1 docs for a4f0675d63ce13f5a08d86042553d3bccd4ce38c
- 2025-05-17 23:59:26,622 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-17 23:59:26,622 - __main__ - INFO - Work done
- 2025-05-17 23:59:26,623 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-17 23:59:56,702 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-17 23:59:56,702 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497590/input.pdf as PDF document
- 2025-05-17 23:59:56,702 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-17 23:59:56,706 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-17 23:59:56,924 - __main__ - INFO - Starting pipeline with PID 469586
- 2025-05-17 23:59:56,924 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-18 00:00:02,591 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-18 00:00:03,631 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-18 00:00:04,668 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-18 00:00:05,721 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-18 00:00:06,782 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-18 00:00:07,847 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-18 00:00:08,914 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-18 00:00:09,253 - sglang - INFO - [2025-05-18 00:00:09] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=781311356, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 00:00:09,253 - __main__ - INFO - [2025-05-18 00:00:09] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=781311356, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 00:00:09,993 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-18 00:00:11,053 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-18 00:00:12,129 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-18 00:00:13,193 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-18 00:00:13,682 - sglang - INFO - [2025-05-18 00:00:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 00:00:13,682 - __main__ - INFO - [2025-05-18 00:00:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 00:00:14,269 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-18 00:00:15,400 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-18 00:00:16,460 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-18 00:00:17,527 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-18 00:00:18,598 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-18 00:00:19,666 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-18 00:00:20,728 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-18 00:00:21,782 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-18 00:00:22,844 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-18 00:00:23,909 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-18 00:00:24,973 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-18 00:00:26,044 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-18 00:00:26,297 - sglang - INFO - [2025-05-18 00:00:26 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 00:00:26,297 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Init torch distributed begin.
- 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Init torch distributed begin.
- 2025-05-18 00:00:27,124 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-18 00:00:28,194 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-18 00:00:29,264 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-18 00:00:30,330 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-18 00:00:31,400 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-18 00:00:32,319 - sglang - INFO - [2025-05-18 00:00:32 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 00:00:32,319 - __main__ - INFO - [2025-05-18 00:00:32 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 00:00:32,479 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-18 00:00:33,364 - sglang - INFO - [2025-05-18 00:00:33 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 00:00:33,364 - __main__ - INFO - [2025-05-18 00:00:33 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 00:00:33,557 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-18 00:00:34,334 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 00:00:34,335 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 00:00:34,636 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-18 00:00:34,689 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.82it/s]
- 2025-05-18 00:00:34,689 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.82it/s]
- 2025-05-18 00:00:35,716 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-18 00:00:35,867 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
- 2025-05-18 00:00:35,867 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
- 2025-05-18 00:00:36,796 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-18 00:00:37,063 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.00s/it]
- 2025-05-18 00:00:37,063 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.00s/it]
- 2025-05-18 00:00:37,865 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-18 00:00:38,194 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05s/it]
- 2025-05-18 00:00:38,194 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05s/it]
- 2025-05-18 00:00:38,194 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
- 2025-05-18 00:00:38,194 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
- 2025-05-18 00:00:38,194 - sglang - INFO -
- 2025-05-18 00:00:38,194 - __main__ - INFO -
- 2025-05-18 00:00:38,370 - sglang - INFO - [2025-05-18 00:00:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 00:00:38,370 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 00:00:38,376 - sglang - INFO - [2025-05-18 00:00:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 00:00:38,376 - __main__ - INFO - [2025-05-18 00:00:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 00:00:38,376 - sglang - INFO - [2025-05-18 00:00:38 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 00:00:38,376 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 00:00:38,525 - sglang - INFO - [2025-05-18 00:00:38 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 00:00:38,525 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 00:00:38,945 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-05-18 00:00:40,023 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-05-18 00:00:40,202 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.88it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.96it/s]
100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
- 2025-05-18 00:00:40,203 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.88it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.96it/s]
100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
- 2025-05-18 00:00:40,203 - sglang - INFO - [2025-05-18 00:00:40 TP0] Capture cuda graph end. Time elapsed: 1.68 s
- 2025-05-18 00:00:40,203 - __main__ - INFO - [2025-05-18 00:00:40 TP0] Capture cuda graph end. Time elapsed: 1.68 s
- 2025-05-18 00:00:41,102 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-05-18 00:00:42,172 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-05-18 00:00:42,689 - sglang - INFO - [2025-05-18 00:00:42 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 00:00:42,689 - __main__ - INFO - [2025-05-18 00:00:42 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 00:00:43,271 - __main__ - INFO - sglang server is ready.
- 2025-05-18 00:00:43,271 - __main__ - INFO - Queue remaining: 1
- 2025-05-18 00:00:43,271 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 00:00:43,271 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-18 00:00:43,271 - __main__ - INFO - Worker 0 processing work item 91f602739df6407104cadbe51df97c7f32677f88
- 2025-05-18 00:00:43,272 - __main__ - INFO - Created all tasks for 91f602739df6407104cadbe51df97c7f32677f88
- 2025-05-18 00:00:43,278 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747497590/input.pdf in worker 0
- 2025-05-18 00:00:43,762 - sglang - INFO - [2025-05-18 00:00:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 00:00:43,763 - __main__ - INFO - [2025-05-18 00:00:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 00:00:43,763 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 00:00:44,650 - sglang - INFO - [2025-05-18 00:00:44] The server is fired up and ready to roll!
- 2025-05-18 00:00:44,650 - __main__ - INFO - [2025-05-18 00:00:44] The server is fired up and ready to roll!
- 2025-05-18 00:00:49,546 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-1
- 2025-05-18 00:00:49,583 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-2
- 2025-05-18 00:00:49,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-3
- 2025-05-18 00:00:49,645 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-4
- 2025-05-18 00:00:49,677 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-5
- 2025-05-18 00:00:53,279 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 00:00:53,279 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 00:00:53,279 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 00:01:03,200 - sglang - INFO - [2025-05-18 00:01:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 00:01:03,200 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 00:01:03,279 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 00:01:03,279 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 00:01:03,279 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 00:01:05,382 - sglang - INFO - [2025-05-18 00:01:05 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-18 00:01:05,382 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 00:01:09,215 - sglang - INFO - [2025-05-18 00:01:09 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.48, #queue-req: 0
- 2025-05-18 00:01:09,215 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:10,074 - sglang - INFO - [2025-05-18 00:01:10 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.81, #queue-req: 0
- 2025-05-18 00:01:10,074 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:10,932 - sglang - INFO - [2025-05-18 00:01:10 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.21, #queue-req: 0
- 2025-05-18 00:01:10,932 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:11,789 - sglang - INFO - [2025-05-18 00:01:11 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.30, #queue-req: 0
- 2025-05-18 00:01:11,789 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:12,647 - sglang - INFO - [2025-05-18 00:01:12 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.19, #queue-req: 0
- 2025-05-18 00:01:12,647 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:13,280 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 00:01:13,281 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 00:01:13,281 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 00:01:13,506 - sglang - INFO - [2025-05-18 00:01:13 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.77, #queue-req: 0
- 2025-05-18 00:01:13,506 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:14,368 - sglang - INFO - [2025-05-18 00:01:14 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.07, #queue-req: 0
- 2025-05-18 00:01:14,368 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:14,902 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-18 00:01:14,902 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-18 00:01:14,902 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-18 00:01:15,229 - sglang - INFO - [2025-05-18 00:01:15 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.09, #queue-req: 0
- 2025-05-18 00:01:15,230 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 00:01:16,088 - sglang - INFO - [2025-05-18 00:01:16 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 217.68, #queue-req: 0
- 2025-05-18 00:01:16,089 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-18 00:01:16,931 - sglang - INFO - [2025-05-18 00:01:16 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.45, #queue-req: 0
- 2025-05-18 00:01:16,931 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-18 00:01:17,764 - sglang - INFO - [2025-05-18 00:01:17 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.03, #queue-req: 0
- 2025-05-18 00:01:17,764 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 00:01:18,588 - sglang - INFO - [2025-05-18 00:01:18 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.54, #queue-req: 0
- 2025-05-18 00:01:18,588 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 00:01:19,316 - __main__ - INFO - Finished TaskGroup for worker on 91f602739df6407104cadbe51df97c7f32677f88
- 2025-05-18 00:01:19,317 - __main__ - INFO - Got 1 docs for 91f602739df6407104cadbe51df97c7f32677f88
- 2025-05-18 00:01:19,318 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-18 00:01:19,318 - __main__ - INFO - Work done
- 2025-05-18 00:01:19,319 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-18 10:13:07,994 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-18 10:13:07,994 - __main__ - INFO - Loading file at olmocr_workspace/job_1747534381/input.pdf as PDF document
- 2025-05-18 10:13:07,994 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-18 10:13:07,998 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-18 10:13:08,244 - __main__ - INFO - Starting pipeline with PID 481106
- 2025-05-18 10:13:08,245 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-18 10:13:13,846 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-18 10:13:14,887 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-18 10:13:15,935 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-18 10:13:17,000 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-18 10:13:18,069 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-18 10:13:19,139 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-18 10:13:19,893 - sglang - INFO - [2025-05-18 10:13:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=969455633, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 10:13:19,893 - __main__ - INFO - [2025-05-18 10:13:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=969455633, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 10:13:20,216 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-18 10:13:21,285 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-18 10:13:22,347 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-18 10:13:23,412 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-18 10:13:24,480 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-18 10:13:25,549 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-18 10:13:26,620 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-18 10:13:27,689 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-18 10:13:28,755 - sglang - INFO - [2025-05-18 10:13:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 10:13:28,755 - __main__ - INFO - [2025-05-18 10:13:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 10:13:28,756 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-18 10:13:29,831 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-18 10:13:30,896 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-18 10:13:31,950 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-18 10:13:33,016 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-18 10:13:33,999 - sglang - INFO - [2025-05-18 10:13:33 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 10:13:33,999 - __main__ - INFO - [2025-05-18 10:13:33 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 10:13:34,093 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Init torch distributed begin.
- 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Init torch distributed begin.
- 2025-05-18 10:13:35,172 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-18 10:13:36,241 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-18 10:13:37,311 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-18 10:13:38,380 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-18 10:13:39,447 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-18 10:13:39,850 - sglang - INFO - [2025-05-18 10:13:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 10:13:39,851 - __main__ - INFO - [2025-05-18 10:13:39 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 10:13:40,517 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-18 10:13:40,953 - sglang - INFO - [2025-05-18 10:13:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 10:13:40,954 - __main__ - INFO - [2025-05-18 10:13:40 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 10:13:41,434 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 10:13:41,434 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 10:13:41,596 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-18 10:13:41,753 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.14it/s]
- 2025-05-18 10:13:41,753 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.14it/s]
- 2025-05-18 10:13:42,677 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-18 10:13:42,796 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.34it/s]
- 2025-05-18 10:13:42,796 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.34it/s]
- 2025-05-18 10:13:43,757 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-18 10:13:43,827 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.14it/s]
- 2025-05-18 10:13:43,827 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.14it/s]
- 2025-05-18 10:13:44,835 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
- 2025-05-18 10:13:44,835 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
- 2025-05-18 10:13:44,836 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
- 2025-05-18 10:13:44,836 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
- 2025-05-18 10:13:44,836 - sglang - INFO -
- 2025-05-18 10:13:44,836 - __main__ - INFO -
- 2025-05-18 10:13:44,837 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-18 10:13:44,914 - sglang - INFO - [2025-05-18 10:13:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 10:13:44,914 - __main__ - INFO - [2025-05-18 10:13:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 10:13:44,920 - sglang - INFO - [2025-05-18 10:13:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 10:13:44,920 - __main__ - INFO - [2025-05-18 10:13:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 10:13:44,921 - sglang - INFO - [2025-05-18 10:13:44 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 10:13:44,921 - __main__ - INFO - [2025-05-18 10:13:44 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 10:13:45,072 - sglang - INFO - [2025-05-18 10:13:45 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 10:13:45,073 - __main__ - INFO - [2025-05-18 10:13:45 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 10:13:45,917 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-18 10:13:46,750 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.88it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.95it/s]
100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
- 2025-05-18 10:13:46,750 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.06it/s]
50%|█████ | 2/4 [00:01<00:01, 1.88it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.95it/s]
100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
- 2025-05-18 10:13:46,750 - sglang - INFO - [2025-05-18 10:13:46 TP0] Capture cuda graph end. Time elapsed: 1.68 s
- 2025-05-18 10:13:46,750 - __main__ - INFO - [2025-05-18 10:13:46 TP0] Capture cuda graph end. Time elapsed: 1.68 s
- 2025-05-18 10:13:46,996 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-18 10:13:48,078 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-18 10:13:49,135 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-05-18 10:13:49,359 - sglang - INFO - [2025-05-18 10:13:49 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 10:13:49,359 - __main__ - INFO - [2025-05-18 10:13:49 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 10:13:50,208 - __main__ - INFO - sglang server is ready.
- 2025-05-18 10:13:50,208 - __main__ - INFO - Queue remaining: 1
- 2025-05-18 10:13:50,209 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:13:50,209 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-18 10:13:50,209 - __main__ - INFO - Worker 0 processing work item 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
- 2025-05-18 10:13:50,209 - __main__ - INFO - Created all tasks for 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
- 2025-05-18 10:13:50,215 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747534381/input.pdf in worker 0
- 2025-05-18 10:13:50,436 - sglang - INFO - [2025-05-18 10:13:50 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:13:50,436 - __main__ - INFO - [2025-05-18 10:13:50 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:13:50,436 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 10:13:51,083 - sglang - INFO - [2025-05-18 10:13:51] The server is fired up and ready to roll!
- 2025-05-18 10:13:51,083 - __main__ - INFO - [2025-05-18 10:13:51] The server is fired up and ready to roll!
- 2025-05-18 10:13:56,725 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-1
- 2025-05-18 10:13:56,740 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-2
- 2025-05-18 10:13:56,769 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-3
- 2025-05-18 10:13:56,776 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-4
- 2025-05-18 10:13:56,800 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-5
- 2025-05-18 10:14:00,279 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:14:00,279 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:14:00,280 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 10:14:10,281 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:14:10,282 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:14:10,282 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 10:14:18,412 - sglang - INFO - [2025-05-18 10:14:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:14:18,413 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 10:14:19,243 - sglang - INFO - [2025-05-18 10:14:19 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-18 10:14:19,244 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:14:20,283 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:14:20,284 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:14:20,284 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-18 10:14:21,091 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-18 10:14:21,091 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-18 10:14:21,091 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-18 10:14:22,685 - sglang - INFO - [2025-05-18 10:14:22 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.16, #queue-req: 0
- 2025-05-18 10:14:22,685 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:23,541 - sglang - INFO - [2025-05-18 10:14:23 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.69, #queue-req: 0
- 2025-05-18 10:14:23,541 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:24,396 - sglang - INFO - [2025-05-18 10:14:24 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.92, #queue-req: 0
- 2025-05-18 10:14:24,396 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:25,252 - sglang - INFO - [2025-05-18 10:14:25 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.68, #queue-req: 0
- 2025-05-18 10:14:25,252 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:26,108 - sglang - INFO - [2025-05-18 10:14:26 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.63, #queue-req: 0
- 2025-05-18 10:14:26,108 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:26,966 - sglang - INFO - [2025-05-18 10:14:26 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.02, #queue-req: 0
- 2025-05-18 10:14:26,966 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:27,827 - sglang - INFO - [2025-05-18 10:14:27 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.34, #queue-req: 0
- 2025-05-18 10:14:27,827 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:28,688 - sglang - INFO - [2025-05-18 10:14:28 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.30, #queue-req: 0
- 2025-05-18 10:14:28,688 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-18 10:14:29,546 - sglang - INFO - [2025-05-18 10:14:29 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.65, #queue-req: 0
- 2025-05-18 10:14:29,546 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-18 10:14:30,285 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:14:30,285 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 48.89 48.89
- sglang_output_tokens 8.43 8.43
- 2025-05-18 10:14:30,285 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 5
- 2025-05-18 10:14:30,387 - sglang - INFO - [2025-05-18 10:14:30 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.78, #queue-req: 0
- 2025-05-18 10:14:30,387 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-05-18 10:14:31,220 - sglang - INFO - [2025-05-18 10:14:31 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 81.63, #queue-req: 0
- 2025-05-18 10:14:31,220 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:14:32,044 - sglang - INFO - [2025-05-18 10:14:32 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.54, #queue-req: 0
- 2025-05-18 10:14:32,044 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:14:32,708 - __main__ - INFO - Finished TaskGroup for worker on 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
- 2025-05-18 10:14:32,708 - __main__ - INFO - Got 1 docs for 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
- 2025-05-18 10:14:32,709 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-18 10:14:32,710 - __main__ - INFO - Work done
- 2025-05-18 10:14:32,710 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-18 10:18:39,289 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-18 10:18:39,290 - __main__ - INFO - Loading file at olmocr_workspace/job_1747534713/input.pdf as PDF document
- 2025-05-18 10:18:39,290 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-18 10:18:39,292 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-18 10:18:39,534 - __main__ - INFO - Starting pipeline with PID 482470
- 2025-05-18 10:18:39,534 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-18 10:18:40,286 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-18 10:18:41,332 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-18 10:18:42,396 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-18 10:18:43,465 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-18 10:18:44,534 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-18 10:18:45,592 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-18 10:18:45,950 - sglang - INFO - [2025-05-18 10:18:45] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=432529288, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 10:18:45,950 - __main__ - INFO - [2025-05-18 10:18:45] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=432529288, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-18 10:18:46,671 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-18 10:18:47,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-18 10:18:48,764 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-18 10:18:49,809 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-18 10:18:50,930 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-18 10:18:51,972 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-18 10:18:53,033 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-18 10:18:54,100 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-18 10:18:54,357 - sglang - INFO - [2025-05-18 10:18:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 10:18:54,357 - __main__ - INFO - [2025-05-18 10:18:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-18 10:18:54,987 - sglang - INFO - [2025-05-18 10:18:54 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 10:18:54,987 - __main__ - INFO - [2025-05-18 10:18:54 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-18 10:18:55,178 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Init torch distributed begin.
- 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Init torch distributed begin.
- 2025-05-18 10:18:56,256 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-18 10:18:57,314 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-18 10:18:58,348 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-18 10:18:59,399 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-18 10:19:00,468 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-18 10:19:00,767 - sglang - INFO - [2025-05-18 10:19:00 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 10:19:00,767 - __main__ - INFO - [2025-05-18 10:19:00 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-18 10:19:01,547 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-18 10:19:01,758 - sglang - INFO - [2025-05-18 10:19:01 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 10:19:01,758 - __main__ - INFO - [2025-05-18 10:19:01 TP0] Using model weights format ['*.safetensors']
- 2025-05-18 10:19:02,285 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 10:19:02,286 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-18 10:19:02,624 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.57it/s]
- 2025-05-18 10:19:02,624 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.57it/s]
- 2025-05-18 10:19:02,626 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-18 10:19:03,506 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.50it/s]
- 2025-05-18 10:19:03,507 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.50it/s]
- 2025-05-18 10:19:03,705 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-18 10:19:04,450 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
- 2025-05-18 10:19:04,450 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
- 2025-05-18 10:19:04,784 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-18 10:19:05,365 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
- 2025-05-18 10:19:05,365 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
- 2025-05-18 10:19:05,365 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
- 2025-05-18 10:19:05,365 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
- 2025-05-18 10:19:05,365 - sglang - INFO -
- 2025-05-18 10:19:05,365 - __main__ - INFO -
- 2025-05-18 10:19:05,512 - sglang - INFO - [2025-05-18 10:19:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 10:19:05,512 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-18 10:19:05,547 - sglang - INFO - [2025-05-18 10:19:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 10:19:05,548 - __main__ - INFO - [2025-05-18 10:19:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-18 10:19:05,548 - sglang - INFO - [2025-05-18 10:19:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 10:19:05,548 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-18 10:19:05,720 - sglang - INFO - [2025-05-18 10:19:05 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 10:19:05,721 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-18 10:19:05,862 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-18 10:19:06,941 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-18 10:19:07,448 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.81it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.41it/s]
100%|██████████| 4/4 [00:01<00:00, 2.87it/s]
100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
- 2025-05-18 10:19:07,448 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.04it/s]
50%|█████ | 2/4 [00:01<00:01, 1.81it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.41it/s]
100%|██████████| 4/4 [00:01<00:00, 2.87it/s]
100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
- 2025-05-18 10:19:07,448 - sglang - INFO - [2025-05-18 10:19:07 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-18 10:19:07,448 - __main__ - INFO - [2025-05-18 10:19:07 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-18 10:19:08,019 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-18 10:19:09,089 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-18 10:19:09,790 - sglang - INFO - [2025-05-18 10:19:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 10:19:09,790 - __main__ - INFO - [2025-05-18 10:19:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-18 10:19:10,181 - __main__ - INFO - sglang server is ready.
- 2025-05-18 10:19:10,182 - __main__ - INFO - Queue remaining: 1
- 2025-05-18 10:19:10,182 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:19:10,182 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-18 10:19:10,182 - __main__ - INFO - Worker 0 processing work item 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
- 2025-05-18 10:19:10,182 - __main__ - INFO - Created all tasks for 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
- 2025-05-18 10:19:10,184 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747534713/input.pdf in worker 0
- 2025-05-18 10:19:10,863 - sglang - INFO - [2025-05-18 10:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:19:10,864 - __main__ - INFO - [2025-05-18 10:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:19:10,864 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 10:19:11,556 - sglang - INFO - [2025-05-18 10:19:11] The server is fired up and ready to roll!
- 2025-05-18 10:19:11,556 - __main__ - INFO - [2025-05-18 10:19:11] The server is fired up and ready to roll!
- 2025-05-18 10:19:16,469 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534713/input.pdf-1
- 2025-05-18 10:19:20,183 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:19:20,183 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:19:20,183 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-18 10:19:30,196 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:19:30,196 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:19:30,196 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-18 10:19:37,626 - sglang - INFO - [2025-05-18 10:19:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-18 10:19:37,627 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-18 10:19:39,046 - sglang - INFO - [2025-05-18 10:19:39 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.37, #queue-req: 0
- 2025-05-18 10:19:39,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:39,865 - sglang - INFO - [2025-05-18 10:19:39 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
- 2025-05-18 10:19:39,865 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:40,197 - __main__ - INFO - Queue remaining: 0
- 2025-05-18 10:19:40,197 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-18 10:19:40,197 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-18 10:19:40,682 - sglang - INFO - [2025-05-18 10:19:40 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.95, #queue-req: 0
- 2025-05-18 10:19:40,682 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:41,498 - sglang - INFO - [2025-05-18 10:19:41 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 49.00, #queue-req: 0
- 2025-05-18 10:19:41,498 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:42,316 - sglang - INFO - [2025-05-18 10:19:42 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.92, #queue-req: 0
- 2025-05-18 10:19:42,316 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:42,489 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-18 10:19:43,135 - sglang - INFO - [2025-05-18 10:19:43 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.84, #queue-req: 0
- 2025-05-18 10:19:43,135 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:43,955 - sglang - INFO - [2025-05-18 10:19:43 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.79, #queue-req: 0
- 2025-05-18 10:19:43,955 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-18 10:19:44,249 - __main__ - INFO - Finished TaskGroup for worker on 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
- 2025-05-18 10:19:44,250 - __main__ - INFO - Got 1 docs for 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
- 2025-05-18 10:19:44,251 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-18 10:19:44,251 - __main__ - INFO - Work done
- 2025-05-18 10:19:44,252 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-21 10:48:29,495 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-21 10:48:29,495 - __main__ - INFO - Loading file at olmocr_workspace/job_1747795702/input.pdf as PDF document
- 2025-05-21 10:48:29,495 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-21 10:48:29,498 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
- 2025-05-21 10:48:29,750 - __main__ - INFO - Starting pipeline with PID 564298
- 2025-05-21 10:48:29,750 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-21 10:48:30,314 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-21 10:48:31,353 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-21 10:48:32,411 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-21 10:48:33,469 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-21 10:48:34,533 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-21 10:48:35,679 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-21 10:48:36,724 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-21 10:48:36,776 - sglang - INFO - [2025-05-21 10:48:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=968885299, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-21 10:48:36,776 - __main__ - INFO - [2025-05-21 10:48:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=968885299, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-21 10:48:37,768 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-21 10:48:38,813 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-21 10:48:39,857 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-21 10:48:40,071 - sglang - INFO - [2025-05-21 10:48:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-21 10:48:40,071 - __main__ - INFO - [2025-05-21 10:48:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-21 10:48:40,900 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-21 10:48:41,946 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-21 10:48:42,991 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-21 10:48:44,035 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-21 10:48:45,079 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-21 10:48:46,112 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-21 10:48:46,231 - sglang - INFO - [2025-05-21 10:48:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-21 10:48:46,231 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Init torch distributed begin.
- 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Init torch distributed begin.
- 2025-05-21 10:48:47,181 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-21 10:48:48,247 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-21 10:48:49,313 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-21 10:48:50,375 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-21 10:48:51,429 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-21 10:48:52,155 - sglang - INFO - [2025-05-21 10:48:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-21 10:48:52,156 - __main__ - INFO - [2025-05-21 10:48:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-21 10:48:52,507 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-21 10:48:53,239 - sglang - INFO - [2025-05-21 10:48:53 TP0] Using model weights format ['*.safetensors']
- 2025-05-21 10:48:53,240 - __main__ - INFO - [2025-05-21 10:48:53 TP0] Using model weights format ['*.safetensors']
- 2025-05-21 10:48:53,586 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-21 10:48:53,738 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-21 10:48:53,738 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-21 10:48:54,049 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.22it/s]
- 2025-05-21 10:48:54,050 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.22it/s]
- 2025-05-21 10:48:54,661 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-21 10:48:55,040 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.41it/s]
- 2025-05-21 10:48:55,041 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.41it/s]
- 2025-05-21 10:48:55,740 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-21 10:48:55,997 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
- 2025-05-21 10:48:55,998 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
- 2025-05-21 10:48:56,818 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-21 10:48:56,931 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
- 2025-05-21 10:48:56,931 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
- 2025-05-21 10:48:56,931 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.25it/s]
- 2025-05-21 10:48:56,931 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.25it/s]
- 2025-05-21 10:48:56,931 - sglang - INFO -
- 2025-05-21 10:48:56,931 - __main__ - INFO -
- 2025-05-21 10:48:57,064 - sglang - INFO - [2025-05-21 10:48:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-21 10:48:57,064 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-21 10:48:57,070 - sglang - INFO - [2025-05-21 10:48:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-21 10:48:57,070 - __main__ - INFO - [2025-05-21 10:48:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-21 10:48:57,070 - sglang - INFO - [2025-05-21 10:48:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-21 10:48:57,070 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-21 10:48:57,221 - sglang - INFO - [2025-05-21 10:48:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-21 10:48:57,222 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-21 10:48:57,897 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-21 10:48:58,974 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.05it/s]
50%|█████ | 2/4 [00:01<00:01, 1.84it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s]
100%|██████████| 4/4 [00:01<00:00, 2.84it/s]
100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
- 2025-05-21 10:48:58,975 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.05it/s]
50%|█████ | 2/4 [00:01<00:01, 1.84it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s]
100%|██████████| 4/4 [00:01<00:00, 2.84it/s]
100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
- 2025-05-21 10:48:58,975 - sglang - INFO - [2025-05-21 10:48:58 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-21 10:48:58,975 - __main__ - INFO - [2025-05-21 10:48:58 TP0] Capture cuda graph end. Time elapsed: 1.73 s
- 2025-05-21 10:48:58,976 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-21 10:49:00,050 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-21 10:49:01,116 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-21 10:49:01,339 - sglang - INFO - [2025-05-21 10:49:01 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-21 10:49:01,339 - __main__ - INFO - [2025-05-21 10:49:01 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-21 10:49:02,215 - __main__ - INFO - sglang server is ready.
- 2025-05-21 10:49:02,215 - __main__ - INFO - Queue remaining: 1
- 2025-05-21 10:49:02,215 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:49:02,216 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-21 10:49:02,216 - __main__ - INFO - Worker 0 processing work item 81f62eccf96bd22b741354b451ad2460310111e8
- 2025-05-21 10:49:02,216 - __main__ - INFO - Created all tasks for 81f62eccf96bd22b741354b451ad2460310111e8
- 2025-05-21 10:49:02,222 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747795702/input.pdf in worker 0
- 2025-05-21 10:49:02,416 - sglang - INFO - [2025-05-21 10:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:49:02,416 - __main__ - INFO - [2025-05-21 10:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:49:02,416 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-21 10:49:03,182 - sglang - INFO - [2025-05-21 10:49:03] The server is fired up and ready to roll!
- 2025-05-21 10:49:03,182 - __main__ - INFO - [2025-05-21 10:49:03] The server is fired up and ready to roll!
- 2025-05-21 10:49:08,501 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-1
- 2025-05-21 10:49:08,536 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-2
- 2025-05-21 10:49:08,574 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-3
- 2025-05-21 10:49:08,602 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-4
- 2025-05-21 10:49:08,646 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-5
- 2025-05-21 10:49:12,218 - __main__ - INFO - Queue remaining: 0
- 2025-05-21 10:49:12,218 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:49:12,218 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-21 10:49:22,205 - sglang - INFO - [2025-05-21 10:49:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:49:22,205 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-21 10:49:22,219 - __main__ - INFO - Queue remaining: 0
- 2025-05-21 10:49:22,219 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:49:22,220 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-21 10:49:24,183 - sglang - INFO - [2025-05-21 10:49:24 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-05-21 10:49:24,183 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:49:28,600 - sglang - INFO - [2025-05-21 10:49:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.31, #queue-req: 0
- 2025-05-21 10:49:28,600 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:29,457 - sglang - INFO - [2025-05-21 10:49:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.26, #queue-req: 0
- 2025-05-21 10:49:29,457 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:30,315 - sglang - INFO - [2025-05-21 10:49:30 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.17, #queue-req: 0
- 2025-05-21 10:49:30,315 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:31,173 - sglang - INFO - [2025-05-21 10:49:31 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.00, #queue-req: 0
- 2025-05-21 10:49:31,174 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:32,032 - sglang - INFO - [2025-05-21 10:49:32 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.02, #queue-req: 0
- 2025-05-21 10:49:32,032 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:32,220 - __main__ - INFO - Queue remaining: 0
- 2025-05-21 10:49:32,221 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:49:32,221 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-05-21 10:49:32,891 - sglang - INFO - [2025-05-21 10:49:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.69, #queue-req: 0
- 2025-05-21 10:49:32,892 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:33,496 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-21 10:49:33,497 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-21 10:49:33,497 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-21 10:49:33,754 - sglang - INFO - [2025-05-21 10:49:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.74, #queue-req: 0
- 2025-05-21 10:49:33,755 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:34,617 - sglang - INFO - [2025-05-21 10:49:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.73, #queue-req: 0
- 2025-05-21 10:49:34,618 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-05-21 10:49:35,477 - sglang - INFO - [2025-05-21 10:49:35 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.58, #queue-req: 0
- 2025-05-21 10:49:35,477 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-05-21 10:49:36,311 - sglang - INFO - [2025-05-21 10:49:36 TP0] Decode batch. #running-req: 2, #token: 5146, token usage: 0.14, gen throughput (token/s): 111.43, #queue-req: 0
- 2025-05-21 10:49:36,312 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-05-21 10:49:37,139 - sglang - INFO - [2025-05-21 10:49:37 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 61.65, #queue-req: 0
- 2025-05-21 10:49:37,139 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:49:37,965 - sglang - INFO - [2025-05-21 10:49:37 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.41, #queue-req: 0
- 2025-05-21 10:49:37,965 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:49:38,631 - __main__ - INFO - Finished TaskGroup for worker on 81f62eccf96bd22b741354b451ad2460310111e8
- 2025-05-21 10:49:38,631 - __main__ - INFO - Got 1 docs for 81f62eccf96bd22b741354b451ad2460310111e8
- 2025-05-21 10:49:38,633 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-21 10:49:38,633 - __main__ - INFO - Work done
- 2025-05-21 10:49:38,634 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-05-21 10:51:53,346 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-05-21 10:51:53,346 - __main__ - INFO - Loading file at olmocr_workspace/job_1747795907/input.pdf as PDF document
- 2025-05-21 10:51:53,347 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-05-21 10:51:53,349 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-05-21 10:51:53,562 - __main__ - INFO - Starting pipeline with PID 565624
- 2025-05-21 10:51:53,562 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-05-21 10:51:59,266 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-05-21 10:52:00,312 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-05-21 10:52:01,373 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-05-21 10:52:02,437 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-05-21 10:52:03,506 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-05-21 10:52:04,575 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-05-21 10:52:04,951 - sglang - INFO - [2025-05-21 10:52:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=425685376, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-21 10:52:04,951 - __main__ - INFO - [2025-05-21 10:52:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=425685376, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-05-21 10:52:05,654 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-05-21 10:52:06,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-05-21 10:52:07,785 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-05-21 10:52:08,232 - sglang - INFO - [2025-05-21 10:52:08] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-21 10:52:08,233 - __main__ - INFO - [2025-05-21 10:52:08] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-05-21 10:52:08,865 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-05-21 10:52:09,941 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-05-21 10:52:11,008 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-05-21 10:52:12,069 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-05-21 10:52:13,134 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-05-21 10:52:14,200 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-05-21 10:52:15,266 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-05-21 10:52:16,328 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-05-21 10:52:17,382 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-05-21 10:52:18,448 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-05-21 10:52:19,029 - sglang - INFO - [2025-05-21 10:52:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-21 10:52:19,029 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-05-21 10:52:19,526 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Init torch distributed begin.
- 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Init torch distributed begin.
- 2025-05-21 10:52:20,605 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-05-21 10:52:21,676 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-05-21 10:52:22,746 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-05-21 10:52:23,817 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-05-21 10:52:24,888 - sglang - INFO - [2025-05-21 10:52:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-21 10:52:24,888 - __main__ - INFO - [2025-05-21 10:52:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-05-21 10:52:24,889 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-05-21 10:52:25,963 - sglang - INFO - [2025-05-21 10:52:25 TP0] Using model weights format ['*.safetensors']
- 2025-05-21 10:52:25,963 - __main__ - INFO - [2025-05-21 10:52:25 TP0] Using model weights format ['*.safetensors']
- 2025-05-21 10:52:25,964 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-05-21 10:52:26,469 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-21 10:52:26,469 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-05-21 10:52:26,761 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-21 10:52:26,761 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
- 2025-05-21 10:52:27,064 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-05-21 10:52:27,680 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.51it/s]
- 2025-05-21 10:52:27,680 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.51it/s]
- 2025-05-21 10:52:28,144 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-05-21 10:52:28,602 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-21 10:52:28,602 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
- 2025-05-21 10:52:29,225 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-05-21 10:52:29,494 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-05-21 10:52:29,494 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-05-21 10:52:29,494 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-21 10:52:29,494 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
- 2025-05-21 10:52:29,495 - sglang - INFO -
- 2025-05-21 10:52:29,495 - __main__ - INFO -
- 2025-05-21 10:52:29,640 - sglang - INFO - [2025-05-21 10:52:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-21 10:52:29,640 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-05-21 10:52:29,646 - sglang - INFO - [2025-05-21 10:52:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-21 10:52:29,647 - __main__ - INFO - [2025-05-21 10:52:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-05-21 10:52:29,647 - sglang - INFO - [2025-05-21 10:52:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-21 10:52:29,647 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-05-21 10:52:29,821 - sglang - INFO - [2025-05-21 10:52:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-21 10:52:29,822 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-05-21 10:52:30,304 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-05-21 10:52:31,384 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-05-21 10:52:31,698 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.14s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.13it/s]
- 2025-05-21 10:52:31,699 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.14s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.13it/s]
- 2025-05-21 10:52:31,699 - sglang - INFO - [2025-05-21 10:52:31 TP0] Capture cuda graph end. Time elapsed: 1.88 s
- 2025-05-21 10:52:31,699 - __main__ - INFO - [2025-05-21 10:52:31 TP0] Capture cuda graph end. Time elapsed: 1.88 s
- 2025-05-21 10:52:32,463 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-05-21 10:52:33,532 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-05-21 10:52:34,223 - sglang - INFO - [2025-05-21 10:52:34 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-21 10:52:34,223 - __main__ - INFO - [2025-05-21 10:52:34 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-05-21 10:52:34,626 - __main__ - INFO - sglang server is ready.
- 2025-05-21 10:52:34,626 - __main__ - INFO - Queue remaining: 1
- 2025-05-21 10:52:34,626 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:52:34,626 - __main__ - INFO -
- Worker ID
- ---------
- 2025-05-21 10:52:34,626 - __main__ - INFO - Worker 0 processing work item 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
- 2025-05-21 10:52:34,626 - __main__ - INFO - Created all tasks for 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
- 2025-05-21 10:52:34,629 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747795907/input.pdf in worker 0
- 2025-05-21 10:52:35,294 - sglang - INFO - [2025-05-21 10:52:35 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:52:35,294 - __main__ - INFO - [2025-05-21 10:52:35 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:52:35,294 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-21 10:52:35,916 - sglang - INFO - [2025-05-21 10:52:35] The server is fired up and ready to roll!
- 2025-05-21 10:52:35,916 - __main__ - INFO - [2025-05-21 10:52:35] The server is fired up and ready to roll!
- 2025-05-21 10:52:41,022 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795907/input.pdf-1
- 2025-05-21 10:52:44,679 - __main__ - INFO - Queue remaining: 0
- 2025-05-21 10:52:44,679 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:52:44,679 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-21 10:52:54,680 - __main__ - INFO - Queue remaining: 0
- 2025-05-21 10:52:54,680 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-05-21 10:52:54,680 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-05-21 10:52:55,180 - sglang - INFO - [2025-05-21 10:52:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-05-21 10:52:55,180 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-05-21 10:52:57,568 - sglang - INFO - [2025-05-21 10:52:57 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.71, #queue-req: 0
- 2025-05-21 10:52:57,568 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:52:58,388 - sglang - INFO - [2025-05-21 10:52:58 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.78, #queue-req: 0
- 2025-05-21 10:52:58,388 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:52:59,208 - sglang - INFO - [2025-05-21 10:52:59 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.76, #queue-req: 0
- 2025-05-21 10:52:59,208 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:53:00,028 - sglang - INFO - [2025-05-21 10:53:00 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.79, #queue-req: 0
- 2025-05-21 10:53:00,028 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:53:00,849 - sglang - INFO - [2025-05-21 10:53:00 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.74, #queue-req: 0
- 2025-05-21 10:53:00,849 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:53:01,671 - sglang - INFO - [2025-05-21 10:53:01 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.64, #queue-req: 0
- 2025-05-21 10:53:01,671 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:53:02,494 - sglang - INFO - [2025-05-21 10:53:02 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.61, #queue-req: 0
- 2025-05-21 10:53:02,494 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-05-21 10:53:02,543 - __main__ - INFO - Finished TaskGroup for worker on 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
- 2025-05-21 10:53:02,543 - __main__ - INFO - Got 1 docs for 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-05-21 10:53:02,546 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-05-21 10:53:02,546 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-05-21 10:53:02,546 - __main__ - INFO - Work done
- 2025-05-21 10:53:02,546 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:00:28,292 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:00:28,293 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-19 23:00:28,293 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:00:28,299 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-19 23:00:28,492 - __main__ - INFO - Starting pipeline with PID 551007
- 2025-07-19 23:00:28,492 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:04:05,046 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:04:05,046 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-19 23:04:05,046 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:04:05,050 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-19 23:04:05,271 - __main__ - INFO - Starting pipeline with PID 551127
- 2025-07-19 23:04:05,271 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:04:05,740 - __main__ - INFO - No work to do, exiting
- 2025-07-19 23:04:30,925 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:04:30,925 - __main__ - INFO - Loading file at tests/gnarly_pdfs/ambiguous.pdf as PDF document
- 2025-07-19 23:04:30,925 - __main__ - INFO - Loading file at tests/gnarly_pdfs/badlines.pdf as PDF document
- 2025-07-19 23:04:30,926 - __main__ - INFO - Loading file at tests/gnarly_pdfs/bws_book_ch2.pdf as PDF document
- 2025-07-19 23:04:30,926 - __main__ - INFO - Loading file at tests/gnarly_pdfs/delivery.pdf as PDF document
- 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/discoverworld_crazy_tables.pdf as PDF document
- 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/dolma-page-1.pdf as PDF document
- 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/edgar.pdf as PDF document
- 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/failing_anchor_pg4.pdf as PDF document
- 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/failing_pdf_pg9.pdf as PDF document
- 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/form_on_later_pages.pdf as PDF document
- 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/guidebook_failed_pages.pdf as PDF document
- 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/handwriting_bad_ocr.pdf as PDF document
- 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/instructions_and_schematics.pdf as PDF document
- 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint1.pdf as PDF document
- 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint2.pdf as PDF document
- 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint3.pdf as PDF document
- 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/load_v_error.pdf as PDF document
- 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/lots_of_chem_tables.pdf as PDF document
- 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/lots_of_sci_tables.pdf as PDF document
- 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/map1.pdf as PDF document
- 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/most_content_in_image_form.pdf as PDF document
- 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/newspaper.pdf as PDF document
- 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/not_parsing.pdf as PDF document
- 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/not_parsing2.pdf as PDF document
- 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/olmo-page-1.pdf as PDF document
- 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/overrun_on_pg8.pdf as PDF document
- 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/pdftotext_two_column_issue.pdf as PDF document
- 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf as PDF document
- 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/skinnypage.pdf as PDF document
- 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf as PDF document
- 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/slideshow_mostly_images.pdf as PDF document
- 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/small_page_size.pdf as PDF document
- 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/some_ocr1.pdf as PDF document
- 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/ti89_guidebook_programming.pdf as PDF document
- 2025-07-19 23:04:30,935 - __main__ - INFO - Loading file at tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf as PDF document
- 2025-07-19 23:04:30,935 - __main__ - INFO - Found 36 total pdf paths to add
- 2025-07-19 23:04:31,500 - __main__ - INFO - Calculated items_per_group: 32 based on average pages per PDF: 15.42
- 2025-07-19 23:04:31,714 - __main__ - INFO - Starting pipeline with PID 551214
- 2025-07-19 23:04:31,714 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:04:37,334 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:04:38,367 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:04:39,407 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:04:40,465 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:04:41,519 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:04:42,584 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:04:43,649 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:04:44,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:04:45,779 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:04:46,844 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:04:47,273 - sglang - INFO - [2025-07-19 23:04:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=83915160, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:04:47,273 - __main__ - INFO - [2025-07-19 23:04:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=83915160, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:04:47,912 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:04:48,984 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:04:50,054 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:04:51,122 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:04:51,816 - sglang - INFO - [2025-07-19 23:04:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:04:51,816 - __main__ - INFO - [2025-07-19 23:04:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:04:52,199 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:04:53,256 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-19 23:04:54,312 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-19 23:04:55,353 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-19 23:04:56,400 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-19 23:04:57,453 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-19 23:04:57,680 - sglang - INFO - [2025-07-19 23:04:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:04:57,681 - __main__ - INFO - [2025-07-19 23:04:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Init torch distributed begin.
- 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Init torch distributed begin.
- 2025-07-19 23:04:58,530 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-19 23:04:59,597 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-19 23:05:00,668 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-19 23:05:01,736 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-19 23:05:02,803 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-19 23:05:03,788 - sglang - INFO - [2025-07-19 23:05:03 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:05:03,788 - __main__ - INFO - [2025-07-19 23:05:03 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:05:03,879 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-19 23:05:04,948 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-19 23:05:05,134 - sglang - INFO - [2025-07-19 23:05:05 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:05:05,134 - __main__ - INFO - [2025-07-19 23:05:05 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:05:05,873 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:05:05,874 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:05:06,027 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-19 23:05:07,097 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-19 23:05:08,166 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-19 23:05:09,236 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-19 23:05:10,303 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-19 23:05:10,368 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.49s/it]
- 2025-07-19 23:05:10,369 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.49s/it]
- 2025-07-19 23:05:11,382 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-19 23:05:12,447 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-19 23:05:13,501 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-19 23:05:14,571 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-19 23:05:15,639 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-19 23:05:16,707 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-19 23:05:17,777 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-19 23:05:18,847 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-19 23:05:19,917 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-19 23:05:20,987 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-19 23:05:22,053 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-19 23:05:23,123 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-19 23:05:23,658 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:17<00:19, 9.67s/it]
- 2025-07-19 23:05:23,658 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:17<00:19, 9.67s/it]
- 2025-07-19 23:05:24,202 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-19 23:05:25,271 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-19 23:05:26,341 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-19 23:05:27,407 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-19 23:05:28,473 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-19 23:05:29,527 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-19 23:05:30,594 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-19 23:05:31,664 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-19 23:05:32,733 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-19 23:05:33,804 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-19 23:05:34,869 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-19 23:05:35,940 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-19 23:05:36,659 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:11, 11.19s/it]
- 2025-07-19 23:05:36,659 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:11, 11.19s/it]
- 2025-07-19 23:05:37,020 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-19 23:05:38,087 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-19 23:05:39,157 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-19 23:05:40,223 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-19 23:05:41,299 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-19 23:05:42,363 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-19 23:05:43,430 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-19 23:05:44,496 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-19 23:05:45,550 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-19 23:05:46,615 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-19 23:05:47,684 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-07-19 23:05:48,752 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-07-19 23:05:49,817 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-07-19 23:05:50,037 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 12.05s/it]
- 2025-07-19 23:05:50,037 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 12.05s/it]
- 2025-07-19 23:05:50,038 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.04s/it]
- 2025-07-19 23:05:50,038 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.04s/it]
- 2025-07-19 23:05:50,038 - sglang - INFO -
- 2025-07-19 23:05:50,038 - __main__ - INFO -
- 2025-07-19 23:05:50,305 - sglang - INFO - [2025-07-19 23:05:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:05:50,305 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:05:50,319 - sglang - INFO - [2025-07-19 23:05:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:05:50,319 - __main__ - INFO - [2025-07-19 23:05:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:05:50,319 - sglang - INFO - [2025-07-19 23:05:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:05:50,319 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:05:50,611 - sglang - INFO - [2025-07-19 23:05:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:05:50,612 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:05:50,895 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-07-19 23:05:51,970 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-07-19 23:05:53,047 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-07-19 23:05:53,309 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.59s/it]
50%|█████ | 2/4 [00:01<00:01, 1.14it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.57it/s]
100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
100%|██████████| 4/4 [00:02<00:00, 1.49it/s]
- 2025-07-19 23:05:53,310 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.59s/it]
50%|█████ | 2/4 [00:01<00:01, 1.14it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.57it/s]
100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
100%|██████████| 4/4 [00:02<00:00, 1.49it/s]
- 2025-07-19 23:05:53,310 - sglang - INFO - [2025-07-19 23:05:53 TP0] Capture cuda graph end. Time elapsed: 2.70 s
- 2025-07-19 23:05:53,310 - __main__ - INFO - [2025-07-19 23:05:53 TP0] Capture cuda graph end. Time elapsed: 2.70 s
- 2025-07-19 23:05:54,125 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-07-19 23:05:55,193 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-07-19 23:05:56,262 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-07-19 23:05:56,889 - sglang - INFO - [2025-07-19 23:05:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:05:56,889 - __main__ - INFO - [2025-07-19 23:05:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:05:57,362 - __main__ - INFO - sglang server is ready.
- 2025-07-19 23:05:57,363 - __main__ - INFO - Queue remaining: 2
- 2025-07-19 23:05:57,363 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-19 23:05:57,363 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-19 23:05:57,363 - __main__ - INFO - Worker 0 processing work item b903c79fc04852a9f203dfa04143731928e937aa
- 2025-07-19 23:05:57,365 - __main__ - INFO - Created all tasks for b903c79fc04852a9f203dfa04143731928e937aa
- 2025-07-19 23:05:57,385 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/ambiguous.pdf in worker 0
- 2025-07-19 23:05:57,395 - __main__ - INFO - Got 48 pages to do for tests/gnarly_pdfs/bws_book_ch2.pdf in worker 0
- 2025-07-19 23:05:57,397 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/dolma-page-1.pdf in worker 0
- 2025-07-19 23:05:57,403 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/failing_anchor_pg4.pdf in worker 0
- 2025-07-19 23:05:57,407 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/edgar.pdf in worker 0
- 2025-07-19 23:05:57,412 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/failing_pdf_pg9.pdf in worker 0
- 2025-07-19 23:05:57,427 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/form_on_later_pages.pdf in worker 0
- 2025-07-19 23:05:57,434 - __main__ - INFO - Got 3 pages to do for tests/gnarly_pdfs/guidebook_failed_pages.pdf in worker 0
- 2025-07-19 23:05:57,446 - __main__ - INFO - Got 29 pages to do for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf in worker 0
- 2025-07-19 23:05:57,449 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/large_prompt_hint2.pdf in worker 0
- 2025-07-19 23:05:57,454 - __main__ - INFO - Got 4 pages to do for tests/gnarly_pdfs/large_prompt_hint3.pdf in worker 0
- 2025-07-19 23:05:57,456 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/handwriting_bad_ocr.pdf in worker 0
- 2025-07-19 23:05:57,568 - __main__ - INFO - Got 27 pages to do for tests/gnarly_pdfs/large_prompt_hint1.pdf in worker 0
- 2025-07-19 23:05:57,570 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/lots_of_sci_tables.pdf in worker 0
- 2025-07-19 23:05:57,652 - __main__ - INFO - Got 106 pages to do for tests/gnarly_pdfs/instructions_and_schematics.pdf in worker 0
- 2025-07-19 23:05:57,656 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/lots_of_chem_tables.pdf in worker 0
- 2025-07-19 23:05:57,660 - __main__ - INFO - Got 7 pages to do for tests/gnarly_pdfs/most_content_in_image_form.pdf in worker 0
- 2025-07-19 23:05:57,666 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/not_parsing.pdf in worker 0
- 2025-07-19 23:05:57,668 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/newspaper.pdf in worker 0
- 2025-07-19 23:05:57,672 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/map1.pdf in worker 0
- 2025-07-19 23:05:57,682 - __main__ - INFO - Got 16 pages to do for tests/gnarly_pdfs/load_v_error.pdf in worker 0
- 2025-07-19 23:05:57,698 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/not_parsing2.pdf in worker 0
- 2025-07-19 23:05:57,710 - __main__ - INFO - Got 54 pages to do for tests/gnarly_pdfs/overrun_on_pg8.pdf in worker 0
- 2025-07-19 23:05:57,714 - __main__ - INFO - Got 14 pages to do for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf in worker 0
- 2025-07-19 23:05:57,715 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/olmo-page-1.pdf in worker 0
- 2025-07-19 23:05:57,718 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf in worker 0
- 2025-07-19 23:05:57,719 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/small_page_size.pdf in worker 0
- 2025-07-19 23:05:58,342 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/skinnypage.pdf in worker 0
- 2025-07-19 23:05:58,344 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/some_ocr1.pdf in worker 0
- 2025-07-19 23:05:58,349 - __main__ - INFO - Got 26 pages to do for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf in worker 0
- 2025-07-19 23:05:58,356 - __main__ - INFO - Got 40 pages to do for tests/gnarly_pdfs/ti89_guidebook_programming.pdf in worker 0
- 2025-07-19 23:05:58,434 - __main__ - INFO - Got 68 pages to do for tests/gnarly_pdfs/slideshow_mostly_images.pdf in worker 0
- 2025-07-19 23:05:58,646 - sglang - INFO - [2025-07-19 23:05:58 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-19 23:05:58,646 - __main__ - INFO - [2025-07-19 23:05:58 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-19 23:05:58,647 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-19 23:06:04,437 - sglang - INFO - [2025-07-19 23:06:04] The server is fired up and ready to roll!
- 2025-07-19 23:06:04,437 - __main__ - INFO - [2025-07-19 23:06:04] The server is fired up and ready to roll!
- 2025-07-19 23:06:07,432 - __main__ - INFO - Queue remaining: 1
- 2025-07-19 23:06:07,433 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-19 23:06:07,433 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 529
- 2025-07-19 23:06:13,162 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ambiguous.pdf-1
- 2025-07-19 23:06:13,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-1
- 2025-07-19 23:06:13,264 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-2
- 2025-07-19 23:06:13,355 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-4
- 2025-07-19 23:06:13,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-3
- 2025-07-19 23:06:13,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-5
- 2025-07-19 23:06:13,463 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-6
- 2025-07-19 23:06:13,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-7
- 2025-07-19 23:06:13,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-8
- 2025-07-19 23:06:13,558 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-10
- 2025-07-19 23:06:13,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-9
- 2025-07-19 23:06:13,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-11
- 2025-07-19 23:06:13,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-13
- 2025-07-19 23:06:13,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-12
- 2025-07-19 23:06:13,647 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-14
- 2025-07-19 23:06:13,669 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-15
- 2025-07-19 23:06:13,674 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-17
- 2025-07-19 23:06:13,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-18
- 2025-07-19 23:06:13,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-16
- 2025-07-19 23:06:13,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-19
- 2025-07-19 23:06:13,750 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-21
- 2025-07-19 23:06:13,761 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-20
- 2025-07-19 23:06:13,766 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-23
- 2025-07-19 23:06:13,768 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-22
- 2025-07-19 23:06:13,769 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-25
- 2025-07-19 23:06:13,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-24
- 2025-07-19 23:06:13,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-26
- 2025-07-19 23:06:13,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-27
- 2025-07-19 23:06:13,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-28
- 2025-07-19 23:06:13,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-30
- 2025-07-19 23:06:13,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-33
- 2025-07-19 23:06:13,861 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-31
- 2025-07-19 23:06:13,862 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-29
- 2025-07-19 23:06:13,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-34
- 2025-07-19 23:06:13,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-32
- 2025-07-19 23:06:13,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-35
- 2025-07-19 23:06:13,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-36
- 2025-07-19 23:06:13,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-37
- 2025-07-19 23:06:13,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-41
- 2025-07-19 23:06:13,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-38
- 2025-07-19 23:06:13,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-39
- 2025-07-19 23:06:13,952 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-40
- 2025-07-19 23:06:13,967 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-44
- 2025-07-19 23:06:14,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-42
- 2025-07-19 23:06:14,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-45
- 2025-07-19 23:06:14,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-43
- 2025-07-19 23:06:14,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-48
- 2025-07-19 23:06:14,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-46
- 2025-07-19 23:06:14,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-47
- 2025-07-19 23:06:14,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-1
- 2025-07-19 23:06:14,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-2
- 2025-07-19 23:06:14,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-3
- 2025-07-19 23:06:14,155 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-7
- 2025-07-19 23:06:14,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-8
- 2025-07-19 23:06:14,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-4
- 2025-07-19 23:06:14,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/edgar.pdf-1
- 2025-07-19 23:06:14,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-1
- 2025-07-19 23:06:14,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-6
- 2025-07-19 23:06:14,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-2
- 2025-07-19 23:06:14,249 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-9
- 2025-07-19 23:06:14,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-4
- 2025-07-19 23:06:14,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-5
- 2025-07-19 23:06:14,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-8
- 2025-07-19 23:06:14,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-6
- 2025-07-19 23:06:14,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-3
- 2025-07-19 23:06:14,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-7
- 2025-07-19 23:06:14,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-4
- 2025-07-19 23:06:14,363 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-3
- 2025-07-19 23:06:14,435 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-5
- 2025-07-19 23:06:14,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-7
- 2025-07-19 23:06:14,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-2
- 2025-07-19 23:06:14,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-6
- 2025-07-19 23:06:14,453 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-8
- 2025-07-19 23:06:14,461 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-10
- 2025-07-19 23:06:14,537 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-1
- 2025-07-19 23:06:14,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-9
- 2025-07-19 23:06:14,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-2
- 2025-07-19 23:06:14,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2
- 2025-07-19 23:06:14,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6
- 2025-07-19 23:06:14,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5
- 2025-07-19 23:06:14,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10
- 2025-07-19 23:06:14,653 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4
- 2025-07-19 23:06:14,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12
- 2025-07-19 23:06:14,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9
- 2025-07-19 23:06:14,656 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-5
- 2025-07-19 23:06:14,662 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14
- 2025-07-19 23:06:14,735 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13
- 2025-07-19 23:06:14,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18
- 2025-07-19 23:06:14,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15
- 2025-07-19 23:06:14,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-1
- 2025-07-19 23:06:14,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21
- 2025-07-19 23:06:14,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20
- 2025-07-19 23:06:14,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1
- 2025-07-19 23:06:14,842 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24
- 2025-07-19 23:06:14,855 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25
- 2025-07-19 23:06:14,856 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23
- 2025-07-19 23:06:14,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11
- 2025-07-19 23:06:14,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19
- 2025-07-19 23:06:14,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17
- 2025-07-19 23:06:14,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27
- 2025-07-19 23:06:14,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16
- 2025-07-19 23:06:14,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26
- 2025-07-19 23:06:14,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29
- 2025-07-19 23:06:14,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28
- 2025-07-19 23:06:14,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-6
- 2025-07-19 23:06:14,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-4
- 2025-07-19 23:06:14,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-3
- 2025-07-19 23:06:15,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8
- 2025-07-19 23:06:15,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-3
- 2025-07-19 23:06:15,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-2
- 2025-07-19 23:06:15,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22
- 2025-07-19 23:06:15,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-3
- 2025-07-19 23:06:15,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-1
- 2025-07-19 23:06:15,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-1
- 2025-07-19 23:06:15,054 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-4
- 2025-07-19 23:06:15,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3
- 2025-07-19 23:06:15,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-5
- 2025-07-19 23:06:15,160 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7
- 2025-07-19 23:06:15,644 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-2
- 2025-07-19 23:06:16,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-2
- 2025-07-19 23:06:16,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-4
- 2025-07-19 23:06:16,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-12
- 2025-07-19 23:06:16,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-16
- 2025-07-19 23:06:16,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-1
- 2025-07-19 23:06:16,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-19
- 2025-07-19 23:06:16,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-9
- 2025-07-19 23:06:16,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-5
- 2025-07-19 23:06:16,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-1
- 2025-07-19 23:06:16,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-7
- 2025-07-19 23:06:16,745 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-15
- 2025-07-19 23:06:16,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-6
- 2025-07-19 23:06:16,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-14
- 2025-07-19 23:06:16,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-1
- 2025-07-19 23:06:16,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-2
- 2025-07-19 23:06:16,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-3
- 2025-07-19 23:06:16,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-18
- 2025-07-19 23:06:17,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-3
- 2025-07-19 23:06:17,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-6
- 2025-07-19 23:06:17,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-13
- 2025-07-19 23:06:17,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-4
- 2025-07-19 23:06:17,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/dolma-page-1.pdf-1
- 2025-07-19 23:06:17,134 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-5
- 2025-07-19 23:06:17,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-10
- 2025-07-19 23:06:17,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-11
- 2025-07-19 23:06:17,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-9
- 2025-07-19 23:06:17,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-20
- 2025-07-19 23:06:17,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-23
- 2025-07-19 23:06:17,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-12
- 2025-07-19 23:06:17,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-13
- 2025-07-19 23:06:17,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-8
- 2025-07-19 23:06:17,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-7
- 2025-07-19 23:06:17,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-22
- 2025-07-19 23:06:17,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-19
- 2025-07-19 23:06:17,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-15
- 2025-07-19 23:06:17,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-10
- 2025-07-19 23:06:17,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-25
- 2025-07-19 23:06:17,433 - __main__ - INFO - Queue remaining: 1
- 2025-07-19 23:06:17,434 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-19 23:06:17,434 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 529
- 2025-07-19 23:06:17,434 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-18
- 2025-07-19 23:06:17,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-17
- 2025-07-19 23:06:17,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-2
- 2025-07-19 23:06:17,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-23
- 2025-07-19 23:06:17,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-22
- 2025-07-19 23:06:17,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-24
- 2025-07-19 23:06:17,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-6
- 2025-07-19 23:06:17,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-21
- 2025-07-19 23:06:17,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-21
- 2025-07-19 23:06:17,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-26
- 2025-07-19 23:06:17,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-27
- 2025-07-19 23:06:17,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-25
- 2025-07-19 23:06:17,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-28
- 2025-07-19 23:06:17,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-14
- 2025-07-19 23:06:17,658 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-3
- 2025-07-19 23:06:17,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-4
- 2025-07-19 23:06:17,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-35
- 2025-07-19 23:06:17,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-29
- 2025-07-19 23:06:17,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-31
- 2025-07-19 23:06:17,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-34
- 2025-07-19 23:06:17,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-32
- 2025-07-19 23:06:17,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-20
- 2025-07-19 23:06:17,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-33
- 2025-07-19 23:06:17,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-37
- 2025-07-19 23:06:17,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-41
- 2025-07-19 23:06:17,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-16
- 2025-07-19 23:06:17,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-26
- 2025-07-19 23:06:17,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-38
- 2025-07-19 23:06:17,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-30
- 2025-07-19 23:06:17,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-39
- 2025-07-19 23:06:17,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-36
- 2025-07-19 23:06:17,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-45
- 2025-07-19 23:06:17,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-42
- 2025-07-19 23:06:17,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-8
- 2025-07-19 23:06:17,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-43
- 2025-07-19 23:06:17,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-40
- 2025-07-19 23:06:17,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-47
- 2025-07-19 23:06:18,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-27
- 2025-07-19 23:06:18,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-5
- 2025-07-19 23:06:18,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-56
- 2025-07-19 23:06:18,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-50
- 2025-07-19 23:06:18,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-51
- 2025-07-19 23:06:18,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-11
- 2025-07-19 23:06:18,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-58
- 2025-07-19 23:06:18,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-52
- 2025-07-19 23:06:18,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-17
- 2025-07-19 23:06:18,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-49
- 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-65
- 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-62
- 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-44
- 2025-07-19 23:06:18,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-59
- 2025-07-19 23:06:18,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-67
- 2025-07-19 23:06:18,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-48
- 2025-07-19 23:06:18,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-70
- 2025-07-19 23:06:18,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-24
- 2025-07-19 23:06:18,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-61
- 2025-07-19 23:06:18,353 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-54
- 2025-07-19 23:06:18,433 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-68
- 2025-07-19 23:06:18,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-64
- 2025-07-19 23:06:18,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-71
- 2025-07-19 23:06:18,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-63
- 2025-07-19 23:06:18,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-57
- 2025-07-19 23:06:18,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-73
- 2025-07-19 23:06:18,454 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-72
- 2025-07-19 23:06:18,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-74
- 2025-07-19 23:06:18,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-46
- 2025-07-19 23:06:18,544 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-77
- 2025-07-19 23:06:18,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-83
- 2025-07-19 23:06:18,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-66
- 2025-07-19 23:06:18,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-60
- 2025-07-19 23:06:18,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-79
- 2025-07-19 23:06:18,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-80
- 2025-07-19 23:06:18,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-55
- 2025-07-19 23:06:18,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-1
- 2025-07-19 23:06:18,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-94
- 2025-07-19 23:06:18,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-89
- 2025-07-19 23:06:18,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-53
- 2025-07-19 23:06:18,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-87
- 2025-07-19 23:06:18,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-69
- 2025-07-19 23:06:18,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-2
- 2025-07-19 23:06:18,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-88
- 2025-07-19 23:06:18,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-3
- 2025-07-19 23:06:18,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-5
- 2025-07-19 23:06:18,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-98
- 2025-07-19 23:06:18,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-2
- 2025-07-19 23:06:18,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-1
- 2025-07-19 23:06:18,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-5
- 2025-07-19 23:06:18,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-85
- 2025-07-19 23:06:18,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-4
- 2025-07-19 23:06:19,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-6
- 2025-07-19 23:06:19,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-92
- 2025-07-19 23:06:19,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-3
- 2025-07-19 23:06:19,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-7
- 2025-07-19 23:06:19,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-4
- 2025-07-19 23:06:19,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-9
- 2025-07-19 23:06:19,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-101
- 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-91
- 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-95
- 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-6
- 2025-07-19 23:06:19,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-76
- 2025-07-19 23:06:19,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-99
- 2025-07-19 23:06:19,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-2
- 2025-07-19 23:06:19,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1
- 2025-07-19 23:06:19,239 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-7
- 2025-07-19 23:06:19,241 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-8
- 2025-07-19 23:06:19,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-6
- 2025-07-19 23:06:19,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-84
- 2025-07-19 23:06:19,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-3
- 2025-07-19 23:06:19,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-78
- 2025-07-19 23:06:19,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-4
- 2025-07-19 23:06:19,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-5
- 2025-07-19 23:06:19,333 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-4
- 2025-07-19 23:06:19,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-3
- 2025-07-19 23:06:19,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-1
- 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-8
- 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-106
- 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-86
- 2025-07-19 23:06:19,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-93
- 2025-07-19 23:06:19,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-81
- 2025-07-19 23:06:19,347 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-1
- 2025-07-19 23:06:19,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-14
- 2025-07-19 23:06:19,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-16
- 2025-07-19 23:06:19,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-2
- 2025-07-19 23:06:19,433 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-15
- 2025-07-19 23:06:19,434 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-6
- 2025-07-19 23:06:19,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-13
- 2025-07-19 23:06:19,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-75
- 2025-07-19 23:06:19,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-7
- 2025-07-19 23:06:19,452 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-5
- 2025-07-19 23:06:19,453 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-96
- 2025-07-19 23:06:19,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-102
- 2025-07-19 23:06:19,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-100
- 2025-07-19 23:06:19,537 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-1
- 2025-07-19 23:06:19,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-4
- 2025-07-19 23:06:19,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-10
- 2025-07-19 23:06:19,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-5
- 2025-07-19 23:06:19,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-3
- 2025-07-19 23:06:19,647 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-7
- 2025-07-19 23:06:19,649 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-9
- 2025-07-19 23:06:19,652 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-8
- 2025-07-19 23:06:19,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-3
- 2025-07-19 23:06:19,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-6
- 2025-07-19 23:06:19,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-103
- 2025-07-19 23:06:19,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-7
- 2025-07-19 23:06:19,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-12
- 2025-07-19 23:06:19,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-105
- 2025-07-19 23:06:19,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-10
- 2025-07-19 23:06:19,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-82
- 2025-07-19 23:06:19,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-11
- 2025-07-19 23:06:19,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-21
- 2025-07-19 23:06:19,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-8
- 2025-07-19 23:06:19,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-5
- 2025-07-19 23:06:19,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-13
- 2025-07-19 23:06:19,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-16
- 2025-07-19 23:06:19,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-15
- 2025-07-19 23:06:19,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-14
- 2025-07-19 23:06:19,936 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-97
- 2025-07-19 23:06:19,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-104
- 2025-07-19 23:06:19,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-9
- 2025-07-19 23:06:19,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-22
- 2025-07-19 23:06:19,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-24
- 2025-07-19 23:06:19,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-20
- 2025-07-19 23:06:19,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-17
- 2025-07-19 23:06:19,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-18
- 2025-07-19 23:06:19,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-90
- 2025-07-19 23:06:19,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-2
- 2025-07-19 23:06:19,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-11
- 2025-07-19 23:06:20,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-23
- 2025-07-19 23:06:20,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-19
- 2025-07-19 23:06:20,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-28
- 2025-07-19 23:06:20,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-26
- 2025-07-19 23:06:20,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-25
- 2025-07-19 23:06:20,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-4
- 2025-07-19 23:06:20,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-30
- 2025-07-19 23:06:20,055 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-27
- 2025-07-19 23:06:20,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-37
- 2025-07-19 23:06:20,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-31
- 2025-07-19 23:06:20,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-36
- 2025-07-19 23:06:20,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-32
- 2025-07-19 23:06:20,145 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-41
- 2025-07-19 23:06:20,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-6
- 2025-07-19 23:06:20,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-39
- 2025-07-19 23:06:20,159 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-29
- 2025-07-19 23:06:20,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3
- 2025-07-19 23:06:20,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-40
- 2025-07-19 23:06:20,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-35
- 2025-07-19 23:06:20,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-9
- 2025-07-19 23:06:20,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-45
- 2025-07-19 23:06:20,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-48
- 2025-07-19 23:06:20,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-46
- 2025-07-19 23:06:20,246 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4
- 2025-07-19 23:06:20,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-47
- 2025-07-19 23:06:20,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-43
- 2025-07-19 23:06:20,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-50
- 2025-07-19 23:06:20,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-52
- 2025-07-19 23:06:20,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7
- 2025-07-19 23:06:20,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-51
- 2025-07-19 23:06:20,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-42
- 2025-07-19 23:06:20,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-54
- 2025-07-19 23:06:20,348 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-34
- 2025-07-19 23:06:20,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-7
- 2025-07-19 23:06:20,353 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-2
- 2025-07-19 23:06:20,438 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-8
- 2025-07-19 23:06:20,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-44
- 2025-07-19 23:06:20,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-12
- 2025-07-19 23:06:20,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2
- 2025-07-19 23:06:20,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-49
- 2025-07-19 23:06:20,550 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-38
- 2025-07-19 23:06:20,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-33
- 2025-07-19 23:06:20,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-1
- 2025-07-19 23:06:20,655 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2
- 2025-07-19 23:06:20,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/newspaper.pdf-1
- 2025-07-19 23:06:20,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-53
- 2025-07-19 23:06:21,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1
- 2025-07-19 23:06:21,175 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3
- 2025-07-19 23:06:21,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1
- 2025-07-19 23:06:21,268 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10
- 2025-07-19 23:06:21,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4
- 2025-07-19 23:06:21,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-19 23:06:21,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2
- 2025-07-19 23:06:21,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6
- 2025-07-19 23:06:21,362 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8
- 2025-07-19 23:06:21,437 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6
- 2025-07-19 23:06:21,484 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3
- 2025-07-19 23:06:21,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/some_ocr1.pdf-1
- 2025-07-19 23:06:21,557 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1
- 2025-07-19 23:06:21,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14
- 2025-07-19 23:06:21,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14
- 2025-07-19 23:06:21,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8
- 2025-07-19 23:06:21,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5
- 2025-07-19 23:06:21,685 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10
- 2025-07-19 23:06:21,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11
- 2025-07-19 23:06:21,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7
- 2025-07-19 23:06:21,880 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12
- 2025-07-19 23:06:21,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13
- 2025-07-19 23:06:21,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15
- 2025-07-19 23:06:21,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21
- 2025-07-19 23:06:21,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4
- 2025-07-19 23:06:21,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5
- 2025-07-19 23:06:21,972 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22
- 2025-07-19 23:06:22,065 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16
- 2025-07-19 23:06:22,069 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13
- 2025-07-19 23:06:22,072 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17
- 2025-07-19 23:06:22,074 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12
- 2025-07-19 23:06:22,086 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26
- 2025-07-19 23:06:22,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9
- 2025-07-19 23:06:22,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5
- 2025-07-19 23:06:22,149 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-1
- 2025-07-19 23:06:22,167 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8
- 2025-07-19 23:06:22,176 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11
- 2025-07-19 23:06:22,237 - __main__ - INFO - Built page query for tests/gnarly_pdfs/olmo-page-1.pdf-1
- 2025-07-19 23:06:22,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10
- 2025-07-19 23:06:22,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3
- 2025-07-19 23:06:22,333 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1
- 2025-07-19 23:06:22,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9
- 2025-07-19 23:06:22,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7
- 2025-07-19 23:06:22,342 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18
- 2025-07-19 23:06:22,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6
- 2025-07-19 23:06:22,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2
- 2025-07-19 23:06:22,383 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6
- 2025-07-19 23:06:22,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7
- 2025-07-19 23:06:22,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20
- 2025-07-19 23:06:22,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8
- 2025-07-19 23:06:22,475 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24
- 2025-07-19 23:06:22,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4
- 2025-07-19 23:06:22,544 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9
- 2025-07-19 23:06:22,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16
- 2025-07-19 23:06:22,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12
- 2025-07-19 23:06:22,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5
- 2025-07-19 23:06:22,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2
- 2025-07-19 23:06:22,572 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17
- 2025-07-19 23:06:22,634 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19
- 2025-07-19 23:06:22,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25
- 2025-07-19 23:06:22,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10
- 2025-07-19 23:06:22,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24
- 2025-07-19 23:06:22,656 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18
- 2025-07-19 23:06:22,664 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19
- 2025-07-19 23:06:22,666 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13
- 2025-07-19 23:06:22,670 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25
- 2025-07-19 23:06:22,735 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22
- 2025-07-19 23:06:22,737 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23
- 2025-07-19 23:06:22,778 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29
- 2025-07-19 23:06:22,873 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26
- 2025-07-19 23:06:22,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9
- 2025-07-19 23:06:22,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32
- 2025-07-19 23:06:22,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14
- 2025-07-19 23:06:22,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20
- 2025-07-19 23:06:22,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15
- 2025-07-19 23:06:22,957 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37
- 2025-07-19 23:06:22,965 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36
- 2025-07-19 23:06:23,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-1
- 2025-07-19 23:06:23,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21
- 2025-07-19 23:06:23,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-3
- 2025-07-19 23:06:23,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11
- 2025-07-19 23:06:23,061 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-6
- 2025-07-19 23:06:23,077 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-5
- 2025-07-19 23:06:23,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40
- 2025-07-19 23:06:23,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34
- 2025-07-19 23:06:23,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39
- 2025-07-19 23:06:23,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33
- 2025-07-19 23:06:23,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23
- 2025-07-19 23:06:23,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38
- 2025-07-19 23:06:23,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28
- 2025-07-19 23:06:23,162 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31
- 2025-07-19 23:06:23,174 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27
- 2025-07-19 23:06:23,264 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-8
- 2025-07-19 23:06:23,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35
- 2025-07-19 23:06:23,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30
- 2025-07-19 23:06:23,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-4
- 2025-07-19 23:06:23,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-35
- 2025-07-19 23:06:23,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-18
- 2025-07-19 23:06:23,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-7
- 2025-07-19 23:06:23,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-36
- 2025-07-19 23:06:23,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-19
- 2025-07-19 23:06:23,574 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-10
- 2025-07-19 23:06:23,575 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-2
- 2025-07-19 23:06:23,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-12
- 2025-07-19 23:06:23,657 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-17
- 2025-07-19 23:06:23,664 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-16
- 2025-07-19 23:06:23,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-31
- 2025-07-19 23:06:23,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-27
- 2025-07-19 23:06:23,777 - __main__ - INFO - Built page query for tests/gnarly_pdfs/small_page_size.pdf-1
- 2025-07-19 23:06:23,946 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-9
- 2025-07-19 23:06:23,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-22
- 2025-07-19 23:06:23,962 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-48
- 2025-07-19 23:06:23,974 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-42
- 2025-07-19 23:06:24,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-41
- 2025-07-19 23:06:24,057 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-14
- 2025-07-19 23:06:24,076 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-39
- 2025-07-19 23:06:24,077 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-38
- 2025-07-19 23:06:24,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-13
- 2025-07-19 23:06:24,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-43
- 2025-07-19 23:06:24,161 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-29
- 2025-07-19 23:06:24,167 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-40
- 2025-07-19 23:06:24,170 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-24
- 2025-07-19 23:06:24,176 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-46
- 2025-07-19 23:06:24,246 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-37
- 2025-07-19 23:06:24,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-23
- 2025-07-19 23:06:24,276 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-20
- 2025-07-19 23:06:24,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-30
- 2025-07-19 23:06:24,363 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-55
- 2025-07-19 23:06:24,665 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-11
- 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-15
- 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-50
- 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-44
- 2025-07-19 23:06:24,923 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-28
- 2025-07-19 23:06:24,923 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-45
- 2025-07-19 23:06:24,924 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-32
- 2025-07-19 23:06:24,958 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-21
- 2025-07-19 23:06:24,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-49
- 2025-07-19 23:06:24,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-26
- 2025-07-19 23:06:24,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-47
- 2025-07-19 23:06:24,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-68
- 2025-07-19 23:06:24,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-33
- 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-25
- 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-53
- 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-54
- 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-61
- 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-67
- 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-57
- 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-34
- 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-56
- 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-62
- 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-51
- 2025-07-19 23:06:25,008 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-52
- 2025-07-19 23:06:25,021 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-60
- 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-59
- 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-64
- 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-66
- 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-58
- 2025-07-19 23:06:25,023 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-65
- 2025-07-19 23:06:25,023 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-63
- 2025-07-19 23:06:27,435 - __main__ - INFO - Queue remaining: 1
- 2025-07-19 23:06:27,435 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-19 23:06:27,436 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 529
- 2025-07-19 23:06:27,649 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-19 23:06:31,151 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
- 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26 cancelled
- 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29 cancelled
- 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8 cancelled
- 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3 cancelled
- 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17 cancelled
- 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12 cancelled
- 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37 cancelled
- 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-1 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-4 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-7 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-2 cancelled
- 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-5 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-8 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-3 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-6 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-5 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-3 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-6 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-1 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-4 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-2 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-1 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-4 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-7 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-2 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-5 cancelled
- 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-8 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-3 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-6 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-8 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-3 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-6 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-1 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-9 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-4 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-7 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-2 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-5 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-5 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-8 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-3 cancelled
- 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-6 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-1 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-9 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-4 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-7 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-2 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/ambiguous.pdf-1 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-26 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-29 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-8 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-40 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-18 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-30 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-9 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-41 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-19 cancelled
- 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-31 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-10 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-42 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-20 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-32 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-11 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-43 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-21 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-1 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-33 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-12 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-44 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-22 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-2 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-34 cancelled
- 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-13 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-45 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-23 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-3 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-35 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-14 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-46 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-24 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-4 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-36 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-15 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-39 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-47 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-25 cancelled
- 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-5 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-37 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-16 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-48 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-27 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-6 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-38 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-17 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-28 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-7 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/edgar.pdf-1 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/olmo-page-1.pdf-1 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/dolma-page-1.pdf-1 cancelled
- 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-31 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-10 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-42 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-20 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-53 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-32 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-11 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-43 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-1 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-21 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-54 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-33 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-12 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-44 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-2 cancelled
- 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-22 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-34 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-13 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-45 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-3 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-23 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-35 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-14 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-46 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-4 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-24 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-36 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-52 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-15 cancelled
- 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-47 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-5 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-25 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-37 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-16 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-48 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-6 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-27 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-38 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-17 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-49 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-7 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-28 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-39 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-26 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-50 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-8 cancelled
- 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-29 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-40 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-18 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-51 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-9 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-30 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-41 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-19 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-4 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-7 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-2 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-5 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-8 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-3 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-9 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-6 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-1 cancelled
- 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-6 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-1 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-4 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-2 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-5 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-3 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5 cancelled
- 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-2 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-3 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-1 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-1 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-3 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-2 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-4 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-9 cancelled
- 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-19 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-10 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-20 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-11 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-21 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-1 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-12 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-22 cancelled
- 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-2 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-13 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-23 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-3 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-14 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-24 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-4 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-15 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-25 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-5 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-16 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-27 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-6 cancelled
- 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-17 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-7 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-26 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-8 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-18 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-2 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-1 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-7 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-15 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-12 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-2 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-10 cancelled
- 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-5 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-13 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-8 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-9 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-16 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-3 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-11 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-6 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-14 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-1 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-4 cancelled
- 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7 cancelled
- 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-4 cancelled
- 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-89 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-47 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-5 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-90 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-48 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-6 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-91 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-49 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-7 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-92 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-50 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-8 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-93 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-51 cancelled
- 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-9 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-94 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-52 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-10 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-95 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-53 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-11 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-96 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-54 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-12 cancelled
- 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-97 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-55 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-13 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-98 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-56 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-14 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-99 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-57 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-15 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-100 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-58 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-16 cancelled
- 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-101 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-59 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-17 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-102 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-60 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-26 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-103 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-61 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-18 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-104 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-62 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-19 cancelled
- 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-105 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-63 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-20 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-106 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-64 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-21 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-65 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-22 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-66 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-23 cancelled
- 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-67 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-24 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-68 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-25 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-69 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-27 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-70 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-28 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-71 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-29 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-72 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-30 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-73 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-31 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-74 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-32 cancelled
- 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-75 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-33 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-76 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-34 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-77 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-35 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-78 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-36 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-79 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-37 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-80 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-38 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-81 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-39 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-82 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-40 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-83 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-41 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-84 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-42 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-85 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-43 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-1 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-86 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-44 cancelled
- 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-2 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-87 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-45 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-3 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-88 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-46 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-32 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-11 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-43 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-64 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-21 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-53 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-54 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-1 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-33 cancelled
- 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-12 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-44 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-65 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-22 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-55 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-2 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-34 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-13 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-45 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-66 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-23 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-56 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-3 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-35 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-14 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-46 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-67 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-24 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-57 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-4 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-36 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-15 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-47 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-68 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-25 cancelled
- 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-58 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-5 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-37 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-16 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-48 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-27 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-59 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-6 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-38 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-17 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-49 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-28 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-60 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-7 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-39 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-26 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-50 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-29 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-61 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-8 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-40 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-18 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-51 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-30 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-62 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-9 cancelled
- 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-41 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-19 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-42 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-52 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-31 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-63 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-10 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-20 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-7 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-2 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-10 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-5 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-8 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-3 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-6 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-9 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-1 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-4 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/map1.pdf-1 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/small_page_size.pdf-1 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-3 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-6 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-1 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-4 cancelled
- 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-7 cancelled
- 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-2 cancelled
- 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-5 cancelled
- 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/some_ocr1.pdf-1 cancelled
- 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/newspaper.pdf-1 cancelled
- 2025-07-19 23:06:34,650 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:07:14,182 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017009.pdf as PDF document
- 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017020.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017028.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017041.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017049.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017008 (1).pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017008.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017023.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125011001.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017003.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017006.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017007.pdf as PDF document
- 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017011.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017023.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017041.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017048.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D42580L4442014010000.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D6222364440125017008.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D6222364440125017049.pdf as PDF document
- 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111641000.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111667001.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111820005.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017023.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017027.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017041.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D117760444212503R001.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4440711000000.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U44421110A0005.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111640000.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111641000.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111667001.pdf as PDF document
- 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203707759010G4442014010000.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445203MB2C21084N4440125017008.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445203MB2C21084N444212503R001.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K4440711000000.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K44421110A0001.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K44421110A0005.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029527B4442106100010.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007030157E4440149001001.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H4440711000000.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H44421110A0001.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H44421110A0005.pdf as PDF document
- 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035652C4440114020001.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445224007035652C4442014010000.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4440711000000.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A44421110A0001.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A44421110A0005.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111641000.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111667001.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111820005.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200456019383L3442111667001.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200726503846U344201405500301.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200726503846U3442014055009.pdf as PDF document
- 2025-07-19 23:07:14,187 - __main__ - INFO - Found 54 total pdf paths to add
- 2025-07-19 23:07:14,306 - __main__ - INFO - Calculated items_per_group: 53 based on average pages per PDF: 9.35
- 2025-07-19 23:07:14,512 - __main__ - INFO - Starting pipeline with PID 555339
- 2025-07-19 23:07:14,512 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:07:25,124 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:07:27,569 - sglang - INFO - [2025-07-19 23:07:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=495738545, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:07:27,569 - __main__ - INFO - [2025-07-19 23:07:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=495738545, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:07:31,183 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:07:33,574 - sglang - INFO - [2025-07-19 23:07:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:07:33,574 - __main__ - INFO - [2025-07-19 23:07:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:07:37,266 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:07:43,350 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:07:43,353 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:08:05,369 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:08:05,369 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:08:05,369 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:08:05,374 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:08:05,594 - __main__ - INFO - Starting pipeline with PID 556062
- 2025-07-19 23:08:05,594 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:08:11,182 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:08:13,754 - sglang - INFO - [2025-07-19 23:08:13] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=266199639, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:08:13,754 - __main__ - INFO - [2025-07-19 23:08:13] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=266199639, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:08:17,243 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:08:23,178 - sglang - INFO - [2025-07-19 23:08:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:08:23,178 - __main__ - INFO - [2025-07-19 23:08:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:08:23,301 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:08:29,383 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:08:30,095 - sglang - INFO - [2025-07-19 23:08:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:08:30,095 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Init torch distributed begin.
- 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Init torch distributed begin.
- 2025-07-19 23:08:35,464 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:08:36,190 - sglang - INFO - [2025-07-19 23:08:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:08:36,190 - __main__ - INFO - [2025-07-19 23:08:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:08:37,388 - sglang - INFO - [2025-07-19 23:08:37 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:08:37,388 - __main__ - INFO - [2025-07-19 23:08:37 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:08:37,969 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:08:37,969 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:08:38,880 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.10it/s]
- 2025-07-19 23:08:38,880 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.10it/s]
- 2025-07-19 23:08:39,724 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
- 2025-07-19 23:08:39,725 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
- 2025-07-19 23:08:40,691 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
- 2025-07-19 23:08:40,691 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
- 2025-07-19 23:08:41,545 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:08:41,748 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.03it/s]
- 2025-07-19 23:08:41,748 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.03it/s]
- 2025-07-19 23:08:41,748 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
- 2025-07-19 23:08:41,748 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
- 2025-07-19 23:08:41,748 - sglang - INFO -
- 2025-07-19 23:08:41,748 - __main__ - INFO -
- 2025-07-19 23:08:42,117 - sglang - INFO - [2025-07-19 23:08:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:08:42,117 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:08:42,126 - sglang - INFO - [2025-07-19 23:08:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:08:42,126 - __main__ - INFO - [2025-07-19 23:08:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:08:42,126 - sglang - INFO - [2025-07-19 23:08:42 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:08:42,126 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:08:42,354 - sglang - INFO - [2025-07-19 23:08:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:08:42,354 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:08:44,496 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.41s/it]
50%|█████ | 2/4 [00:01<00:01, 1.38it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.99it/s]
100%|██████████| 4/4 [00:02<00:00, 2.50it/s]
100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
- 2025-07-19 23:08:44,496 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.41s/it]
50%|█████ | 2/4 [00:01<00:01, 1.38it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.99it/s]
100%|██████████| 4/4 [00:02<00:00, 2.50it/s]
100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
- 2025-07-19 23:08:44,496 - sglang - INFO - [2025-07-19 23:08:44 TP0] Capture cuda graph end. Time elapsed: 2.14 s
- 2025-07-19 23:08:44,496 - __main__ - INFO - [2025-07-19 23:08:44 TP0] Capture cuda graph end. Time elapsed: 2.14 s
- 2025-07-19 23:08:47,411 - sglang - INFO - [2025-07-19 23:08:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:08:47,411 - __main__ - INFO - [2025-07-19 23:08:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:08:47,494 - sglang - INFO - [2025-07-19 23:08:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:08:47,494 - __main__ - INFO - [2025-07-19 23:08:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:08:47,625 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:08:53,705 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:08:59,786 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:09:05,866 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:09:11,947 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:09:18,027 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:09:24,108 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:09:30,188 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:09:36,268 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:09:42,348 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-19 23:09:42,545 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:10:41,614 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:10:41,614 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:10:41,614 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:10:41,619 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:10:41,861 - __main__ - INFO - Starting pipeline with PID 557098
- 2025-07-19 23:10:41,861 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:11:22,329 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:11:22,329 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:11:22,329 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:11:22,334 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:11:22,524 - __main__ - INFO - Starting pipeline with PID 557180
- 2025-07-19 23:11:22,524 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:11:28,359 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:11:30,721 - sglang - INFO - [2025-07-19 23:11:30] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247677095, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:11:30,722 - __main__ - INFO - [2025-07-19 23:11:30] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247677095, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:11:34,466 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:11:40,546 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:11:41,791 - sglang - INFO - [2025-07-19 23:11:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:11:41,792 - __main__ - INFO - [2025-07-19 23:11:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:11:46,615 - sglang - INFO - [2025-07-19 23:11:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:11:46,615 - __main__ - INFO - [2025-07-19 23:11:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:11:46,627 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:11:47,281 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:11:47,281 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Init torch distributed begin.
- 2025-07-19 23:11:47,282 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Init torch distributed begin.
- 2025-07-19 23:11:52,667 - sglang - INFO - [2025-07-19 23:11:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:11:52,667 - __main__ - INFO - [2025-07-19 23:11:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:11:52,707 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:11:54,501 - sglang - INFO - [2025-07-19 23:11:54 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:11:54,501 - __main__ - INFO - [2025-07-19 23:11:54 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:11:55,094 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:11:55,094 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:11:55,375 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
- 2025-07-19 23:11:55,375 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
- 2025-07-19 23:11:56,196 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:11:56,197 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:11:56,989 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.45it/s]
- 2025-07-19 23:11:56,989 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.45it/s]
- 2025-07-19 23:11:57,877 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.30it/s]
- 2025-07-19 23:11:57,877 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.30it/s]
- 2025-07-19 23:11:57,877 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
- 2025-07-19 23:11:57,877 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
- 2025-07-19 23:11:57,877 - sglang - INFO -
- 2025-07-19 23:11:57,878 - __main__ - INFO -
- 2025-07-19 23:11:58,085 - sglang - INFO - [2025-07-19 23:11:58 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:11:58,086 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:11:58,095 - sglang - INFO - [2025-07-19 23:11:58 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:11:58,095 - __main__ - INFO - [2025-07-19 23:11:58 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:11:58,095 - sglang - INFO - [2025-07-19 23:11:58 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:11:58,095 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:11:58,326 - sglang - INFO - [2025-07-19 23:11:58 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:11:58,327 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:11:58,788 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:12:00,152 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.09s/it]
50%|█████ | 2/4 [00:01<00:01, 1.69it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
- 2025-07-19 23:12:00,152 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.09s/it]
50%|█████ | 2/4 [00:01<00:01, 1.69it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s]
100%|██████████| 4/4 [00:01<00:00, 2.79it/s]
100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
- 2025-07-19 23:12:00,153 - sglang - INFO - [2025-07-19 23:12:00 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-19 23:12:00,153 - __main__ - INFO - [2025-07-19 23:12:00 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-19 23:12:03,340 - sglang - INFO - [2025-07-19 23:12:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:12:03,341 - __main__ - INFO - [2025-07-19 23:12:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:12:03,421 - sglang - INFO - [2025-07-19 23:12:03] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:12:03,422 - __main__ - INFO - [2025-07-19 23:12:03] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:12:04,868 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:12:10,948 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:12:17,028 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:12:23,107 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:12:29,186 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:12:35,266 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:12:41,345 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:12:47,425 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:12:53,503 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:12:55,861 - sglang - INFO - Process Process-2:
- 2025-07-19 23:12:55,861 - __main__ - INFO - Process Process-2:
- 2025-07-19 23:12:55,861 - sglang - INFO - Process Process-1:
- 2025-07-19 23:12:55,861 - __main__ - INFO - Process Process-1:
- 2025-07-19 23:12:55,862 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:44:52,028 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:44:52,029 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:44:52,029 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:44:52,034 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:44:52,207 - __main__ - INFO - Starting pipeline with PID 560498
- 2025-07-19 23:44:52,208 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:45:02,904 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:45:05,219 - sglang - INFO - [2025-07-19 23:45:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=279913238, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:45:05,219 - __main__ - INFO - [2025-07-19 23:45:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=279913238, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:45:08,985 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:45:14,633 - sglang - INFO - [2025-07-19 23:45:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:45:14,634 - __main__ - INFO - [2025-07-19 23:45:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:45:15,066 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:45:21,148 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:45:21,289 - sglang - INFO - [2025-07-19 23:45:21 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:45:21,289 - __main__ - INFO - [2025-07-19 23:45:21 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:45:22,002 - sglang - INFO - [2025-07-19 23:45:22 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:45:22,003 - sglang - INFO - [2025-07-19 23:45:22 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:45:22,003 - sglang - INFO - [2025-07-19 23:45:22 TP0] Init torch distributed begin.
- 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Init torch distributed begin.
- 2025-07-19 23:45:27,230 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:45:27,404 - sglang - INFO - [2025-07-19 23:45:27 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:45:27,404 - __main__ - INFO - [2025-07-19 23:45:27 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:45:28,877 - sglang - INFO - [2025-07-19 23:45:28 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:45:28,877 - __main__ - INFO - [2025-07-19 23:45:28 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:45:29,998 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:45:29,998 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:45:30,295 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.37it/s]
- 2025-07-19 23:45:30,295 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.37it/s]
- 2025-07-19 23:45:31,105 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:45:31,105 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:45:31,891 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.46it/s]
- 2025-07-19 23:45:31,891 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.46it/s]
- 2025-07-19 23:45:32,771 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.31it/s]
- 2025-07-19 23:45:32,771 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.31it/s]
- 2025-07-19 23:45:32,771 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
- 2025-07-19 23:45:32,771 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
- 2025-07-19 23:45:32,771 - sglang - INFO -
- 2025-07-19 23:45:32,771 - __main__ - INFO -
- 2025-07-19 23:45:32,930 - sglang - INFO - [2025-07-19 23:45:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:45:32,931 - __main__ - INFO - [2025-07-19 23:45:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:45:32,938 - sglang - INFO - [2025-07-19 23:45:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:45:32,938 - __main__ - INFO - [2025-07-19 23:45:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:45:32,938 - sglang - INFO - [2025-07-19 23:45:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:45:32,938 - __main__ - INFO - [2025-07-19 23:45:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:45:33,126 - sglang - INFO - [2025-07-19 23:45:33 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:45:33,126 - __main__ - INFO - [2025-07-19 23:45:33 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:45:33,312 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:45:35,023 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.16s/it]
50%|█████ | 2/4 [00:01<00:01, 1.61it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.22it/s]
100%|██████████| 4/4 [00:01<00:00, 2.72it/s]
100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
- 2025-07-19 23:45:35,023 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.16s/it]
50%|█████ | 2/4 [00:01<00:01, 1.61it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.22it/s]
100%|██████████| 4/4 [00:01<00:00, 2.72it/s]
100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
- 2025-07-19 23:45:35,024 - sglang - INFO - [2025-07-19 23:45:35 TP0] Capture cuda graph end. Time elapsed: 1.90 s
- 2025-07-19 23:45:35,024 - __main__ - INFO - [2025-07-19 23:45:35 TP0] Capture cuda graph end. Time elapsed: 1.90 s
- 2025-07-19 23:45:38,605 - sglang - INFO - [2025-07-19 23:45:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:45:38,605 - __main__ - INFO - [2025-07-19 23:45:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:45:38,692 - sglang - INFO - [2025-07-19 23:45:38] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:45:38,693 - __main__ - INFO - [2025-07-19 23:45:38] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:45:39,393 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:45:45,471 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:45:51,550 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:45:57,627 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:46:03,706 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:46:09,785 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:46:15,863 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:46:21,942 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:46:28,021 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:46:34,102 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-19 23:46:40,182 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-19 23:46:46,261 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-19 23:46:52,341 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-19 23:46:58,289 - sglang - INFO - Process Process-1:
- 2025-07-19 23:46:58,289 - __main__ - INFO - Process Process-1:
- 2025-07-19 23:46:58,289 - sglang - INFO - Process Process-2:
- 2025-07-19 23:46:58,289 - __main__ - INFO - Process Process-2:
- 2025-07-19 23:46:58,290 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:47:07,596 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:47:07,597 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:47:07,597 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:47:07,602 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:47:07,823 - __main__ - INFO - Starting pipeline with PID 561326
- 2025-07-19 23:47:07,824 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:47:13,807 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:47:16,268 - sglang - INFO - [2025-07-19 23:47:16] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=716861363, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:47:16,268 - __main__ - INFO - [2025-07-19 23:47:16] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=716861363, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:47:19,909 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:47:21,758 - sglang - INFO - [2025-07-19 23:47:21] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:47:21,758 - __main__ - INFO - [2025-07-19 23:47:21] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:47:25,990 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:47:32,052 - sglang - INFO - [2025-07-19 23:47:32 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:47:32,052 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:47:32,070 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Init torch distributed begin.
- 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Init torch distributed begin.
- 2025-07-19 23:47:38,110 - sglang - INFO - [2025-07-19 23:47:38 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:47:38,110 - __main__ - INFO - [2025-07-19 23:47:38 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:47:38,151 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:47:39,227 - sglang - INFO - [2025-07-19 23:47:39 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:47:39,227 - __main__ - INFO - [2025-07-19 23:47:39 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:47:39,795 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:47:39,795 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:47:40,080 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
- 2025-07-19 23:47:40,080 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
- 2025-07-19 23:47:40,878 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.70it/s]
- 2025-07-19 23:47:40,878 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.70it/s]
- 2025-07-19 23:47:41,647 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.49it/s]
- 2025-07-19 23:47:41,648 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.49it/s]
- 2025-07-19 23:47:42,527 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
- 2025-07-19 23:47:42,527 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
- 2025-07-19 23:47:42,527 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
- 2025-07-19 23:47:42,527 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
- 2025-07-19 23:47:42,527 - sglang - INFO -
- 2025-07-19 23:47:42,527 - __main__ - INFO -
- 2025-07-19 23:47:42,684 - sglang - INFO - [2025-07-19 23:47:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:47:42,684 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:47:42,693 - sglang - INFO - [2025-07-19 23:47:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:47:42,693 - __main__ - INFO - [2025-07-19 23:47:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:47:42,693 - sglang - INFO - [2025-07-19 23:47:42 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:47:42,693 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:47:42,894 - sglang - INFO - [2025-07-19 23:47:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:47:42,895 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:47:44,231 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:47:44,720 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.11s/it]
50%|█████ | 2/4 [00:01<00:01, 1.67it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s]
100%|██████████| 4/4 [00:01<00:00, 2.82it/s]
100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
- 2025-07-19 23:47:44,720 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.11s/it]
50%|█████ | 2/4 [00:01<00:01, 1.67it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s]
100%|██████████| 4/4 [00:01<00:00, 2.82it/s]
100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
- 2025-07-19 23:47:44,721 - sglang - INFO - [2025-07-19 23:47:44 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-19 23:47:44,721 - __main__ - INFO - [2025-07-19 23:47:44 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-19 23:47:47,910 - sglang - INFO - [2025-07-19 23:47:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:47:47,910 - __main__ - INFO - [2025-07-19 23:47:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:47:47,999 - sglang - INFO - [2025-07-19 23:47:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:47:47,999 - __main__ - INFO - [2025-07-19 23:47:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:47:50,313 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:47:56,392 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:48:02,471 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:48:08,550 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:48:14,629 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:48:20,709 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:48:26,788 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:48:30,539 - sglang - INFO - Process Process-2:
- 2025-07-19 23:48:30,539 - __main__ - INFO - Process Process-2:
- 2025-07-19 23:48:30,540 - sglang - INFO - Process Process-1:
- 2025-07-19 23:48:30,540 - __main__ - INFO - Process Process-1:
- 2025-07-19 23:48:30,540 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:49:00,325 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:49:00,325 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:49:00,325 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:49:00,330 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:49:00,552 - __main__ - INFO - Starting pipeline with PID 562231
- 2025-07-19 23:49:00,552 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:49:11,172 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:49:12,801 - sglang - INFO - [2025-07-19 23:49:12] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=109481094, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:49:12,801 - __main__ - INFO - [2025-07-19 23:49:12] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=109481094, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:49:17,223 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:49:22,212 - sglang - INFO - [2025-07-19 23:49:22] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:49:22,212 - __main__ - INFO - [2025-07-19 23:49:22] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:49:23,129 - sglang - INFO - [2025-07-19 23:49:23 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:49:23,129 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:49:23,300 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:49:23,885 - sglang - INFO - [2025-07-19 23:49:23 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:49:23,885 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:49:23,886 - sglang - INFO - [2025-07-19 23:49:23 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:49:23,886 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:49:23,886 - sglang - INFO - [2025-07-19 23:49:23 TP0] Init torch distributed begin.
- 2025-07-19 23:49:23,886 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Init torch distributed begin.
- 2025-07-19 23:49:29,268 - sglang - INFO - [2025-07-19 23:49:29 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:49:29,268 - __main__ - INFO - [2025-07-19 23:49:29 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:49:29,378 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:49:31,052 - sglang - INFO - [2025-07-19 23:49:31 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:49:31,052 - __main__ - INFO - [2025-07-19 23:49:31 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:49:31,692 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:49:31,693 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:49:31,977 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
- 2025-07-19 23:49:31,978 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
- 2025-07-19 23:49:32,770 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.71it/s]
- 2025-07-19 23:49:32,771 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.71it/s]
- 2025-07-19 23:49:33,535 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.50it/s]
- 2025-07-19 23:49:33,535 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.50it/s]
- 2025-07-19 23:49:34,409 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
- 2025-07-19 23:49:34,409 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
- 2025-07-19 23:49:34,409 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.47it/s]
- 2025-07-19 23:49:34,409 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.47it/s]
- 2025-07-19 23:49:34,410 - sglang - INFO -
- 2025-07-19 23:49:34,410 - __main__ - INFO -
- 2025-07-19 23:49:34,567 - sglang - INFO - [2025-07-19 23:49:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:49:34,567 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:49:34,574 - sglang - INFO - [2025-07-19 23:49:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:49:34,574 - __main__ - INFO - [2025-07-19 23:49:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:49:34,575 - sglang - INFO - [2025-07-19 23:49:34 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:49:34,575 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:49:34,768 - sglang - INFO - [2025-07-19 23:49:34 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:49:34,768 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:49:35,456 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:49:36,569 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.08s/it]
50%|█████ | 2/4 [00:01<00:01, 1.70it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s]
100%|██████████| 4/4 [00:01<00:00, 2.84it/s]
100%|██████████| 4/4 [00:01<00:00, 2.23it/s]
- 2025-07-19 23:49:36,569 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.08s/it]
50%|█████ | 2/4 [00:01<00:01, 1.70it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s]
100%|██████████| 4/4 [00:01<00:00, 2.84it/s]
100%|██████████| 4/4 [00:01<00:00, 2.23it/s]
- 2025-07-19 23:49:36,570 - sglang - INFO - [2025-07-19 23:49:36 TP0] Capture cuda graph end. Time elapsed: 1.80 s
- 2025-07-19 23:49:36,570 - __main__ - INFO - [2025-07-19 23:49:36 TP0] Capture cuda graph end. Time elapsed: 1.80 s
- 2025-07-19 23:49:41,048 - sglang - INFO - [2025-07-19 23:49:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:49:41,049 - __main__ - INFO - [2025-07-19 23:49:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:49:41,131 - sglang - INFO - [2025-07-19 23:49:41] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:49:41,131 - __main__ - INFO - [2025-07-19 23:49:41] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:49:41,536 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:49:47,615 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:49:53,695 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:49:59,774 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:50:05,853 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:50:11,933 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:50:18,013 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:50:24,091 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:50:30,170 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:50:36,249 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:50:42,329 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-19 23:50:48,408 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-19 23:50:54,488 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-19 23:51:00,567 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-19 23:51:06,647 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-19 23:51:12,727 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-19 23:51:18,807 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-19 23:51:24,887 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-19 23:51:30,967 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-19 23:51:37,046 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-19 23:51:43,126 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-19 23:51:49,206 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-19 23:51:55,286 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-19 23:52:01,366 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-19 23:52:07,446 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-19 23:52:13,524 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-19 23:52:19,604 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-19 23:52:25,683 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-19 23:52:31,764 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-19 23:52:37,842 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-19 23:52:43,923 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-19 23:52:50,002 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-19 23:52:56,083 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-19 23:53:02,162 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-19 23:53:08,241 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-19 23:53:14,321 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-19 23:53:20,400 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-19 23:53:26,478 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-19 23:53:32,558 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-19 23:53:38,637 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-19 23:53:44,718 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-19 23:53:50,798 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-19 23:53:56,878 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-19 23:54:02,958 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-19 23:54:09,037 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-19 23:54:15,115 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-19 23:54:21,195 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-19 23:54:27,275 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-19 23:54:33,355 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-19 23:54:39,435 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-19 23:54:45,515 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-19 23:54:50,335 - sglang - INFO - Process Process-2:
- 2025-07-19 23:54:50,335 - __main__ - INFO - Process Process-2:
- 2025-07-19 23:54:50,336 - sglang - INFO - Process Process-1:
- 2025-07-19 23:54:50,336 - __main__ - INFO - Process Process-1:
- 2025-07-19 23:54:50,336 - sglang - INFO - Traceback (most recent call last):
- 2025-07-19 23:54:50,336 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-19 23:54:50,336 - sglang - INFO - Traceback (most recent call last):
- 2025-07-19 23:54:50,336 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-19 23:54:50,336 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
- 2025-07-19 23:54:50,336 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
- 2025-07-19 23:54:50,336 - sglang - INFO - self.run()
- 2025-07-19 23:54:50,337 - __main__ - INFO - self.run()
- 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
- 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
- 2025-07-19 23:54:50,337 - sglang - INFO - self._target(*self._args, **self._kwargs)
- 2025-07-19 23:54:50,337 - __main__ - INFO - self._target(*self._args, **self._kwargs)
- 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
- 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
- 2025-07-19 23:54:50,337 - sglang - INFO - scheduler.event_loop_normal()
- 2025-07-19 23:54:50,337 - __main__ - INFO - scheduler.event_loop_normal()
- 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
- 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
- 2025-07-19 23:54:50,337 - sglang - INFO - return func(*args, **kwargs)
- 2025-07-19 23:54:50,337 - __main__ - INFO - return func(*args, **kwargs)
- 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,338 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 473, in event_loop_normal
- 2025-07-19 23:54:50,338 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 473, in event_loop_normal
- 2025-07-19 23:54:50,338 - sglang - INFO - batch = self.get_next_batch_to_run()
- 2025-07-19 23:54:50,338 - __main__ - INFO - batch = self.get_next_batch_to_run()
- 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,338 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 870, in get_next_batch_to_run
- 2025-07-19 23:54:50,338 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 870, in get_next_batch_to_run
- 2025-07-19 23:54:50,338 - sglang - INFO - new_batch = self.get_new_batch_prefill()
- 2025-07-19 23:54:50,338 - __main__ - INFO - new_batch = self.get_new_batch_prefill()
- 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,339 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-19 23:54:50,339 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 888, in get_new_batch_prefill
- 2025-07-19 23:54:50,339 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 888, in get_new_batch_prefill
- 2025-07-19 23:54:50,339 - sglang - INFO - def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
- 2025-07-19 23:54:50,339 - __main__ - INFO - def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
- 2025-07-19 23:54:50,339 - sglang - INFO -
- 2025-07-19 23:54:50,339 - __main__ - INFO -
- 2025-07-19 23:54:50,339 - sglang - INFO - KeyboardInterrupt
- 2025-07-19 23:54:50,339 - __main__ - INFO - KeyboardInterrupt
- 2025-07-19 23:54:50,340 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-19 23:57:11,442 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-19 23:57:11,442 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-19 23:57:11,442 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-19 23:57:11,447 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-19 23:57:11,683 - __main__ - INFO - Starting pipeline with PID 563187
- 2025-07-19 23:57:11,683 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-19 23:57:17,476 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-19 23:57:19,571 - sglang - INFO - [2025-07-19 23:57:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=729530513, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:57:19,572 - __main__ - INFO - [2025-07-19 23:57:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=729530513, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-19 23:57:23,609 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-19 23:57:28,966 - sglang - INFO - [2025-07-19 23:57:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:57:28,966 - __main__ - INFO - [2025-07-19 23:57:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-19 23:57:29,709 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-19 23:57:30,229 - sglang - INFO - [2025-07-19 23:57:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:57:30,229 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:57:30,904 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:57:30,904 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Init torch distributed begin.
- 2025-07-19 23:57:30,905 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Init torch distributed begin.
- 2025-07-19 23:57:35,789 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-19 23:57:36,344 - sglang - INFO - [2025-07-19 23:57:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:57:36,344 - __main__ - INFO - [2025-07-19 23:57:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-19 23:57:37,496 - sglang - INFO - [2025-07-19 23:57:37 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:57:37,496 - __main__ - INFO - [2025-07-19 23:57:37 TP0] Using model weights format ['*.safetensors']
- 2025-07-19 23:57:39,017 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:57:39,018 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-19 23:57:39,310 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.42it/s]
- 2025-07-19 23:57:39,311 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.42it/s]
- 2025-07-19 23:57:40,121 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:57:40,121 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
- 2025-07-19 23:57:40,903 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-19 23:57:40,903 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-19 23:57:41,781 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
- 2025-07-19 23:57:41,781 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
- 2025-07-19 23:57:41,781 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
- 2025-07-19 23:57:41,781 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
- 2025-07-19 23:57:41,781 - sglang - INFO -
- 2025-07-19 23:57:41,781 - __main__ - INFO -
- 2025-07-19 23:57:41,869 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-19 23:57:41,937 - sglang - INFO - [2025-07-19 23:57:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:57:41,937 - __main__ - INFO - [2025-07-19 23:57:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-19 23:57:41,944 - sglang - INFO - [2025-07-19 23:57:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:57:41,944 - __main__ - INFO - [2025-07-19 23:57:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-19 23:57:41,944 - sglang - INFO - [2025-07-19 23:57:41 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:57:41,944 - __main__ - INFO - [2025-07-19 23:57:41 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-19 23:57:42,127 - sglang - INFO - [2025-07-19 23:57:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:57:42,127 - __main__ - INFO - [2025-07-19 23:57:42 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-19 23:57:44,022 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.17s/it]
50%|█████ | 2/4 [00:01<00:01, 1.61it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.23it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
- 2025-07-19 23:57:44,022 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.17s/it]
50%|█████ | 2/4 [00:01<00:01, 1.61it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.23it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
- 2025-07-19 23:57:44,023 - sglang - INFO - [2025-07-19 23:57:44 TP0] Capture cuda graph end. Time elapsed: 1.90 s
- 2025-07-19 23:57:44,023 - __main__ - INFO - [2025-07-19 23:57:44 TP0] Capture cuda graph end. Time elapsed: 1.90 s
- 2025-07-19 23:57:47,287 - sglang - INFO - [2025-07-19 23:57:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:57:47,288 - __main__ - INFO - [2025-07-19 23:57:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-19 23:57:47,371 - sglang - INFO - [2025-07-19 23:57:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:57:47,371 - __main__ - INFO - [2025-07-19 23:57:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-19 23:57:47,958 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-19 23:57:54,039 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-19 23:58:00,119 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-19 23:58:06,199 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-19 23:58:12,278 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-19 23:58:18,359 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-19 23:58:24,437 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-19 23:58:30,517 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-19 23:58:36,596 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-19 23:58:42,675 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-19 23:58:48,756 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-19 23:58:54,835 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-19 23:59:00,915 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-19 23:59:06,995 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-19 23:59:13,075 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-19 23:59:19,155 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-19 23:59:25,235 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-19 23:59:31,315 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-19 23:59:37,395 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-19 23:59:43,475 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-19 23:59:49,555 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-19 23:59:55,634 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 00:00:01,715 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 00:00:07,785 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 00:00:13,864 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 00:00:19,944 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 00:00:26,023 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 00:00:32,104 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 00:00:38,183 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 00:00:44,263 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 00:00:48,050 - sglang - INFO - Process Process-2:
- 2025-07-20 00:00:48,050 - __main__ - INFO - Process Process-2:
- 2025-07-20 00:00:48,050 - sglang - INFO - Process Process-1:
- 2025-07-20 00:00:48,050 - __main__ - INFO - Process Process-1:
- 2025-07-20 00:00:48,050 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 00:02:33,950 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 00:02:33,950 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-20 00:02:33,950 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 00:02:33,955 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-20 00:02:34,199 - __main__ - INFO - Starting pipeline with PID 564088
- 2025-07-20 00:02:34,199 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 00:02:40,299 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 00:02:42,770 - sglang - INFO - [2025-07-20 00:02:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=169654265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:02:42,771 - __main__ - INFO - [2025-07-20 00:02:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=169654265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:02:46,501 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 00:02:51,927 - sglang - INFO - [2025-07-20 00:02:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:02:51,927 - __main__ - INFO - [2025-07-20 00:02:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:02:52,559 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 00:02:58,161 - sglang - INFO - [2025-07-20 00:02:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:02:58,161 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:02:58,643 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 00:02:58,824 - sglang - INFO - [2025-07-20 00:02:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:02:58,824 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:02:58,824 - sglang - INFO - [2025-07-20 00:02:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:02:58,825 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:02:58,825 - sglang - INFO - [2025-07-20 00:02:58 TP0] Init torch distributed begin.
- 2025-07-20 00:02:58,825 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Init torch distributed begin.
- 2025-07-20 00:03:04,227 - sglang - INFO - [2025-07-20 00:03:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:03:04,228 - __main__ - INFO - [2025-07-20 00:03:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:03:04,726 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 00:03:05,805 - sglang - INFO - [2025-07-20 00:03:05 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:03:05,805 - __main__ - INFO - [2025-07-20 00:03:05 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:03:06,331 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:03:06,331 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:03:06,620 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.47it/s]
- 2025-07-20 00:03:06,620 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.47it/s]
- 2025-07-20 00:03:07,428 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.68it/s]
- 2025-07-20 00:03:07,428 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.68it/s]
- 2025-07-20 00:03:08,211 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-20 00:03:08,211 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-20 00:03:09,089 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
- 2025-07-20 00:03:09,089 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
- 2025-07-20 00:03:09,089 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
- 2025-07-20 00:03:09,089 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
- 2025-07-20 00:03:09,089 - sglang - INFO -
- 2025-07-20 00:03:09,090 - __main__ - INFO -
- 2025-07-20 00:03:09,248 - sglang - INFO - [2025-07-20 00:03:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:03:09,248 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:03:09,255 - sglang - INFO - [2025-07-20 00:03:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:03:09,255 - __main__ - INFO - [2025-07-20 00:03:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:03:09,255 - sglang - INFO - [2025-07-20 00:03:09 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:03:09,256 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:03:09,439 - sglang - INFO - [2025-07-20 00:03:09 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:03:09,439 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:03:10,809 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 00:03:11,352 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.18s/it]
50%|█████ | 2/4 [00:01<00:01, 1.59it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.21it/s]
100%|██████████| 4/4 [00:01<00:00, 2.70it/s]
100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
- 2025-07-20 00:03:11,352 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.18s/it]
50%|█████ | 2/4 [00:01<00:01, 1.59it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.21it/s]
100%|██████████| 4/4 [00:01<00:00, 2.70it/s]
100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
- 2025-07-20 00:03:11,353 - sglang - INFO - [2025-07-20 00:03:11 TP0] Capture cuda graph end. Time elapsed: 1.91 s
- 2025-07-20 00:03:11,353 - __main__ - INFO - [2025-07-20 00:03:11 TP0] Capture cuda graph end. Time elapsed: 1.91 s
- 2025-07-20 00:03:14,481 - sglang - INFO - [2025-07-20 00:03:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:03:14,481 - __main__ - INFO - [2025-07-20 00:03:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:03:14,563 - sglang - INFO - [2025-07-20 00:03:14] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:03:14,563 - __main__ - INFO - [2025-07-20 00:03:14] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:03:16,892 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 00:03:22,972 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 00:03:29,043 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 00:03:35,124 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 00:03:41,169 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 00:03:47,215 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 00:03:53,260 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 00:03:59,305 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 00:04:05,350 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 00:04:11,394 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 00:04:17,440 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 00:04:23,486 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 00:04:29,567 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 00:04:35,649 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 00:04:41,729 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 00:04:47,812 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 00:04:53,893 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 00:04:59,974 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 00:05:06,055 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 00:05:07,000 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 00:07:13,942 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 00:07:13,942 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-20 00:07:13,942 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 00:07:13,947 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-20 00:07:14,179 - __main__ - INFO - Starting pipeline with PID 565190
- 2025-07-20 00:07:14,179 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 00:07:26,746 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 00:07:29,417 - sglang - INFO - [2025-07-20 00:07:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=578772715, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:07:29,417 - __main__ - INFO - [2025-07-20 00:07:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=578772715, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:07:32,827 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 00:07:38,849 - sglang - INFO - [2025-07-20 00:07:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:07:38,850 - __main__ - INFO - [2025-07-20 00:07:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:07:38,906 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 00:07:39,117 - sglang - INFO - [2025-07-20 00:07:39 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:07:39,117 - __main__ - INFO - [2025-07-20 00:07:39 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:07:40,082 - sglang - INFO - [2025-07-20 00:07:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:07:40,082 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:07:40,083 - sglang - INFO - [2025-07-20 00:07:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:07:40,083 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:07:40,083 - sglang - INFO - [2025-07-20 00:07:40 TP0] Init torch distributed begin.
- 2025-07-20 00:07:40,083 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Init torch distributed begin.
- 2025-07-20 00:07:44,988 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 00:07:45,468 - sglang - INFO - [2025-07-20 00:07:45 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:07:45,469 - __main__ - INFO - [2025-07-20 00:07:45 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:07:46,609 - sglang - INFO - [2025-07-20 00:07:46 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:07:46,609 - __main__ - INFO - [2025-07-20 00:07:46 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:07:47,619 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:07:47,619 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:07:47,947 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.05it/s]
- 2025-07-20 00:07:47,947 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.05it/s]
- 2025-07-20 00:07:48,860 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-07-20 00:07:48,860 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
- 2025-07-20 00:07:49,735 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.31it/s]
- 2025-07-20 00:07:49,735 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.31it/s]
- 2025-07-20 00:07:50,729 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
- 2025-07-20 00:07:50,729 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
- 2025-07-20 00:07:50,729 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
- 2025-07-20 00:07:50,729 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
- 2025-07-20 00:07:50,730 - sglang - INFO -
- 2025-07-20 00:07:50,730 - __main__ - INFO -
- 2025-07-20 00:07:50,902 - sglang - INFO - [2025-07-20 00:07:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:07:50,902 - __main__ - INFO - [2025-07-20 00:07:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:07:50,910 - sglang - INFO - [2025-07-20 00:07:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:07:50,910 - __main__ - INFO - [2025-07-20 00:07:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:07:50,910 - sglang - INFO - [2025-07-20 00:07:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:07:50,910 - __main__ - INFO - [2025-07-20 00:07:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:07:51,069 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 00:07:51,129 - sglang - INFO - [2025-07-20 00:07:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:07:51,129 - __main__ - INFO - [2025-07-20 00:07:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:07:52,868 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.78it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.42it/s]
100%|██████████| 4/4 [00:01<00:00, 2.91it/s]
100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
- 2025-07-20 00:07:52,868 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.78it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.42it/s]
100%|██████████| 4/4 [00:01<00:00, 2.91it/s]
100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
- 2025-07-20 00:07:52,868 - sglang - INFO - [2025-07-20 00:07:52 TP0] Capture cuda graph end. Time elapsed: 1.74 s
- 2025-07-20 00:07:52,869 - __main__ - INFO - [2025-07-20 00:07:52 TP0] Capture cuda graph end. Time elapsed: 1.74 s
- 2025-07-20 00:07:56,046 - sglang - INFO - [2025-07-20 00:07:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:07:56,046 - __main__ - INFO - [2025-07-20 00:07:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:07:56,132 - sglang - INFO - [2025-07-20 00:07:56] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:07:56,132 - __main__ - INFO - [2025-07-20 00:07:56] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:07:57,149 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 00:08:03,230 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 00:08:08,259 - sglang - INFO - Process Process-2:
- 2025-07-20 00:08:08,259 - __main__ - INFO - Process Process-2:
- 2025-07-20 00:08:08,259 - sglang - INFO - Process Process-1:
- 2025-07-20 00:08:08,260 - __main__ - INFO - Process Process-1:
- 2025-07-20 00:08:08,260 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 00:08:47,553 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 00:08:47,553 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-20 00:08:47,553 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 00:08:47,558 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-20 00:08:47,756 - __main__ - INFO - Starting pipeline with PID 566220
- 2025-07-20 00:08:47,756 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 00:10:15,905 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 00:10:15,905 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
- 2025-07-20 00:10:15,906 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 00:10:15,911 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
- 2025-07-20 00:10:16,145 - __main__ - INFO - Starting pipeline with PID 566313
- 2025-07-20 00:10:16,145 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 00:10:21,832 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 00:10:23,926 - sglang - INFO - [2025-07-20 00:10:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=825752942, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:10:23,926 - __main__ - INFO - [2025-07-20 00:10:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=825752942, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 00:10:27,966 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 00:10:33,565 - sglang - INFO - [2025-07-20 00:10:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:10:33,565 - __main__ - INFO - [2025-07-20 00:10:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 00:10:34,041 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 00:10:39,849 - sglang - INFO - [2025-07-20 00:10:39 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:10:39,850 - __main__ - INFO - [2025-07-20 00:10:39 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 00:10:40,120 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 00:10:40,562 - sglang - INFO - [2025-07-20 00:10:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 00:10:40,563 - sglang - INFO - [2025-07-20 00:10:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 00:10:40,563 - sglang - INFO - [2025-07-20 00:10:40 TP0] Init torch distributed begin.
- 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Init torch distributed begin.
- 2025-07-20 00:10:45,965 - sglang - INFO - [2025-07-20 00:10:45 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:10:45,966 - __main__ - INFO - [2025-07-20 00:10:45 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 00:10:46,201 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 00:10:47,090 - sglang - INFO - [2025-07-20 00:10:47 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:10:47,090 - __main__ - INFO - [2025-07-20 00:10:47 TP0] Using model weights format ['*.safetensors']
- 2025-07-20 00:10:47,688 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:10:47,688 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 00:10:47,975 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
- 2025-07-20 00:10:47,976 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
- 2025-07-20 00:10:48,780 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.69it/s]
- 2025-07-20 00:10:48,780 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.69it/s]
- 2025-07-20 00:10:49,562 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-20 00:10:49,562 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
- 2025-07-20 00:10:50,428 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
- 2025-07-20 00:10:50,428 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
- 2025-07-20 00:10:50,428 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
- 2025-07-20 00:10:50,428 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
- 2025-07-20 00:10:50,429 - sglang - INFO -
- 2025-07-20 00:10:50,429 - __main__ - INFO -
- 2025-07-20 00:10:50,585 - sglang - INFO - [2025-07-20 00:10:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:10:50,585 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 00:10:50,592 - sglang - INFO - [2025-07-20 00:10:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:10:50,592 - __main__ - INFO - [2025-07-20 00:10:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 00:10:50,592 - sglang - INFO - [2025-07-20 00:10:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:10:50,592 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 00:10:50,776 - sglang - INFO - [2025-07-20 00:10:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:10:50,776 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 00:10:52,282 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 00:10:52,700 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.21s/it]
50%|█████ | 2/4 [00:01<00:01, 1.57it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.20it/s]
100%|██████████| 4/4 [00:01<00:00, 2.71it/s]
100%|██████████| 4/4 [00:01<00:00, 2.08it/s]
- 2025-07-20 00:10:52,701 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.21s/it]
50%|█████ | 2/4 [00:01<00:01, 1.57it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.20it/s]
100%|██████████| 4/4 [00:01<00:00, 2.71it/s]
100%|██████████| 4/4 [00:01<00:00, 2.08it/s]
- 2025-07-20 00:10:52,701 - sglang - INFO - [2025-07-20 00:10:52 TP0] Capture cuda graph end. Time elapsed: 1.92 s
- 2025-07-20 00:10:52,701 - __main__ - INFO - [2025-07-20 00:10:52 TP0] Capture cuda graph end. Time elapsed: 1.92 s
- 2025-07-20 00:10:55,599 - sglang - INFO - [2025-07-20 00:10:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:10:55,599 - __main__ - INFO - [2025-07-20 00:10:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 00:10:55,680 - sglang - INFO - [2025-07-20 00:10:55] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:10:55,680 - __main__ - INFO - [2025-07-20 00:10:55] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 00:10:58,361 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 00:11:04,440 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 00:11:10,519 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 00:11:12,074 - sglang - INFO - Process Process-1:
- 2025-07-20 00:11:12,074 - __main__ - INFO - Process Process-1:
- 2025-07-20 00:11:12,074 - sglang - INFO - Process Process-2:
- 2025-07-20 00:11:12,074 - __main__ - INFO - Process Process-2:
- 2025-07-20 00:11:12,075 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 11:07:46,350 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 11:07:46,350 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 11:07:46,350 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 11:07:46,354 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 11:07:46,542 - __main__ - INFO - Starting pipeline with PID 578329
- 2025-07-20 11:07:46,542 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 11:10:01,984 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 11:10:04,397 - sglang - INFO - [2025-07-20 11:10:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=270783148, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:10:04,398 - __main__ - INFO - [2025-07-20 11:10:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=270783148, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:10:08,112 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 11:10:14,191 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 11:10:20,271 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 11:10:26,353 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 11:10:32,433 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 11:10:38,514 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 11:10:44,595 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 11:10:50,676 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 11:10:54,998 - sglang - INFO - Process Process-2:
- 2025-07-20 11:10:54,999 - __main__ - INFO - Process Process-2:
- 2025-07-20 11:10:54,999 - sglang - INFO - Process Process-1:
- 2025-07-20 11:10:54,999 - __main__ - INFO - Process Process-1:
- 2025-07-20 11:10:54,999 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 11:11:06,277 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 11:11:06,278 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 11:11:06,278 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 11:11:06,281 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 11:11:06,492 - __main__ - INFO - Starting pipeline with PID 579071
- 2025-07-20 11:11:06,492 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
- 2025-07-20 11:13:22,698 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 11:13:23,990 - sglang - INFO - [2025-07-20 11:13:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=381064224, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:13:23,990 - __main__ - INFO - [2025-07-20 11:13:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=381064224, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:13:28,777 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 11:13:34,835 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 11:13:40,915 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 11:13:46,995 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 11:13:53,076 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 11:13:59,157 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 11:14:05,239 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 11:14:11,320 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 11:14:17,401 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 11:14:23,482 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 11:14:29,564 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 11:14:35,645 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 11:14:41,725 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 11:14:47,807 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 11:14:53,888 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 11:14:59,964 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 11:15:00,760 - sglang - INFO - [2025-07-20 11:15:00] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:15:00,760 - __main__ - INFO - [2025-07-20 11:15:00] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:15:05,976 - sglang - INFO - [2025-07-20 11:15:05 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:15:05,976 - __main__ - INFO - [2025-07-20 11:15:05 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:15:06,043 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 11:15:12,125 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 11:15:18,205 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 11:15:24,285 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 11:15:26,126 - sglang - INFO - [2025-07-20 11:15:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:15:26,126 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:15:26,127 - sglang - INFO - [2025-07-20 11:15:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:15:26,127 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:15:26,127 - sglang - INFO - [2025-07-20 11:15:26 TP0] Init torch distributed begin.
- 2025-07-20 11:15:26,127 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Init torch distributed begin.
- 2025-07-20 11:15:30,366 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 11:15:31,530 - sglang - INFO - [2025-07-20 11:15:31 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:15:31,530 - __main__ - INFO - [2025-07-20 11:15:31 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:15:36,448 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 11:15:42,229 - sglang - INFO - [2025-07-20 11:15:42 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:15:42,230 - __main__ - INFO - [2025-07-20 11:15:42 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:15:42,230 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:15:42,230 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:15:42,230 - sglang - INFO - sock = connection.create_connection(
- 2025-07-20 11:15:42,230 - __main__ - INFO - sock = connection.create_connection(
- 2025-07-20 11:15:42,230 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,230 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,230 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:15:42,230 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:15:42,230 - sglang - INFO - raise err
- 2025-07-20 11:15:42,230 - __main__ - INFO - raise err
- 2025-07-20 11:15:42,231 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:15:42,231 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:15:42,231 - sglang - INFO - sock.connect(sa)
- 2025-07-20 11:15:42,231 - __main__ - INFO - sock.connect(sa)
- 2025-07-20 11:15:42,231 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:15:42,231 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:15:42,231 - sglang - INFO -
- 2025-07-20 11:15:42,231 - __main__ - INFO -
- 2025-07-20 11:15:42,231 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:15:42,231 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:15:42,231 - sglang - INFO -
- 2025-07-20 11:15:42,231 - __main__ - INFO -
- 2025-07-20 11:15:42,231 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,232 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:15:42,232 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:15:42,232 - sglang - INFO - response = self._make_request(
- 2025-07-20 11:15:42,232 - __main__ - INFO - response = self._make_request(
- 2025-07-20 11:15:42,232 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,232 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:15:42,232 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:15:42,232 - sglang - INFO - raise new_e
- 2025-07-20 11:15:42,232 - __main__ - INFO - raise new_e
- 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:15:42,233 - sglang - INFO - self._validate_conn(conn)
- 2025-07-20 11:15:42,233 - __main__ - INFO - self._validate_conn(conn)
- 2025-07-20 11:15:42,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:15:42,233 - sglang - INFO - conn.connect()
- 2025-07-20 11:15:42,233 - __main__ - INFO - conn.connect()
- 2025-07-20 11:15:42,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:15:42,233 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:15:42,233 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:15:42,233 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:15:42,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:15:42,234 - sglang - INFO - raise NewConnectionError(
- 2025-07-20 11:15:42,234 - __main__ - INFO - raise NewConnectionError(
- 2025-07-20 11:15:42,234 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:15:42,234 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:15:42,234 - sglang - INFO -
- 2025-07-20 11:15:42,234 - __main__ - INFO -
- 2025-07-20 11:15:42,234 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:15:42,234 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:15:42,234 - sglang - INFO -
- 2025-07-20 11:15:42,234 - __main__ - INFO -
- 2025-07-20 11:15:42,234 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,235 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:15:42,235 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:15:42,235 - sglang - INFO - resp = conn.urlopen(
- 2025-07-20 11:15:42,235 - __main__ - INFO - resp = conn.urlopen(
- 2025-07-20 11:15:42,235 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:15:42,235 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:15:42,235 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:15:42,235 - sglang - INFO - retries = retries.increment(
- 2025-07-20 11:15:42,235 - __main__ - INFO - retries = retries.increment(
- 2025-07-20 11:15:42,235 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,235 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:15:42,236 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:15:42,236 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:15:42,236 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:15:42,236 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,236 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,236 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:15:42,236 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:15:42,236 - sglang - INFO -
- 2025-07-20 11:15:42,236 - __main__ - INFO -
- 2025-07-20 11:15:42,236 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:15:42,236 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:15:42,236 - sglang - INFO -
- 2025-07-20 11:15:42,236 - __main__ - INFO -
- 2025-07-20 11:15:42,237 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,237 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:15:42,237 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:15:42,237 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:15:42,237 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:15:42,237 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:15:42,237 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,237 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,237 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:15:42,237 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:15:42,237 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:15:42,237 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:15:42,237 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,238 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:15:42,238 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:15:42,238 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:15:42,238 - sglang - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:15:42,238 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:15:42,238 - sglang - INFO - self.load_model()
- 2025-07-20 11:15:42,238 - __main__ - INFO - self.load_model()
- 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:15:42,239 - sglang - INFO - self.model = get_model(
- 2025-07-20 11:15:42,239 - __main__ - INFO - self.model = get_model(
- 2025-07-20 11:15:42,239 - sglang - INFO - ^^^^^^^^^^
- 2025-07-20 11:15:42,239 - __main__ - INFO - ^^^^^^^^^^
- 2025-07-20 11:15:42,239 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:15:42,239 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:15:42,239 - sglang - INFO - return loader.load_model(
- 2025-07-20 11:15:42,239 - __main__ - INFO - return loader.load_model(
- 2025-07-20 11:15:42,239 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,239 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,239 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:15:42,239 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:15:42,239 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:15:42,239 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:15:42,240 - sglang - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:15:42,240 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:15:42,240 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:15:42,240 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:15:42,240 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,240 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:15:42,241 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:15:42,241 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:15:42,241 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:15:42,241 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:15:42,241 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:15:42,241 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:15:42,241 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:15:42,241 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:15:42,241 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:15:42,241 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:15:42,241 - sglang - INFO - self._api.repo_info(
- 2025-07-20 11:15:42,241 - __main__ - INFO - self._api.repo_info(
- 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:15:42,242 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:15:42,242 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:15:42,242 - sglang - INFO - return method(
- 2025-07-20 11:15:42,242 - __main__ - INFO - return method(
- 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^
- 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^
- 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:15:42,242 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:15:42,242 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:15:42,242 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:15:42,242 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:15:42,242 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:15:42,242 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:15:42,243 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:15:42,243 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:15:42,243 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:15:42,243 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:15:42,243 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:15:42,243 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:15:42,243 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:15:42,243 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:15:42,243 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: d7db9d3b-7988-48c6-ac50-b06366e3a9c9)')
- 2025-07-20 11:15:42,243 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: d7db9d3b-7988-48c6-ac50-b06366e3a9c9)')
- 2025-07-20 11:15:42,243 - sglang - INFO -
- 2025-07-20 11:15:42,243 - __main__ - INFO -
- 2025-07-20 11:15:42,244 - sglang - INFO - [2025-07-20 11:15:42] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:15:42,244 - __main__ - INFO - [2025-07-20 11:15:42] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:15:42,475 - __main__ - WARNING - SGLang server task ended
- 2025-07-20 11:15:42,528 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 11:15:48,512 - sglang - INFO - [2025-07-20 11:15:48] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=442733111, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:15:48,512 - __main__ - INFO - [2025-07-20 11:15:48] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=442733111, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:15:48,633 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 11:15:54,713 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 11:16:00,794 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 11:16:06,873 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 11:16:12,956 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 11:16:19,037 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 11:16:25,120 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 11:16:31,201 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 11:16:37,283 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 11:16:43,365 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 11:16:49,446 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 11:16:55,529 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 11:17:01,610 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 11:17:07,695 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 11:17:13,776 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 11:17:19,857 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 11:17:25,201 - sglang - INFO - [2025-07-20 11:17:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:17:25,201 - __main__ - INFO - [2025-07-20 11:17:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:17:25,937 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 11:17:30,921 - sglang - INFO - [2025-07-20 11:17:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:17:30,921 - __main__ - INFO - [2025-07-20 11:17:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:17:32,018 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 11:17:38,101 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 11:17:44,181 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 11:17:50,263 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 11:17:51,072 - sglang - INFO - [2025-07-20 11:17:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:17:51,072 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:17:51,073 - sglang - INFO - [2025-07-20 11:17:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:17:51,073 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:17:51,073 - sglang - INFO - [2025-07-20 11:17:51 TP0] Init torch distributed begin.
- 2025-07-20 11:17:51,073 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Init torch distributed begin.
- 2025-07-20 11:17:56,343 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 11:17:56,482 - sglang - INFO - [2025-07-20 11:17:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:17:56,482 - __main__ - INFO - [2025-07-20 11:17:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:18:02,424 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 11:18:07,166 - sglang - INFO - [2025-07-20 11:18:07 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:18:07,166 - __main__ - INFO - [2025-07-20 11:18:07 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:18:07,166 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:18:07,166 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:18:07,166 - sglang - INFO - sock = connection.create_connection(
- 2025-07-20 11:18:07,166 - __main__ - INFO - sock = connection.create_connection(
- 2025-07-20 11:18:07,167 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,167 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,167 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:18:07,167 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:18:07,167 - sglang - INFO - raise err
- 2025-07-20 11:18:07,167 - __main__ - INFO - raise err
- 2025-07-20 11:18:07,167 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:18:07,167 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:18:07,167 - sglang - INFO - sock.connect(sa)
- 2025-07-20 11:18:07,167 - __main__ - INFO - sock.connect(sa)
- 2025-07-20 11:18:07,167 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:18:07,168 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:18:07,168 - sglang - INFO -
- 2025-07-20 11:18:07,168 - __main__ - INFO -
- 2025-07-20 11:18:07,168 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:18:07,168 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:18:07,168 - sglang - INFO -
- 2025-07-20 11:18:07,168 - __main__ - INFO -
- 2025-07-20 11:18:07,168 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,168 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,168 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:18:07,168 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:18:07,168 - sglang - INFO - response = self._make_request(
- 2025-07-20 11:18:07,168 - __main__ - INFO - response = self._make_request(
- 2025-07-20 11:18:07,169 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,169 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:18:07,169 - sglang - INFO - raise new_e
- 2025-07-20 11:18:07,169 - __main__ - INFO - raise new_e
- 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:18:07,169 - sglang - INFO - self._validate_conn(conn)
- 2025-07-20 11:18:07,169 - __main__ - INFO - self._validate_conn(conn)
- 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:18:07,169 - sglang - INFO - conn.connect()
- 2025-07-20 11:18:07,170 - __main__ - INFO - conn.connect()
- 2025-07-20 11:18:07,170 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:18:07,170 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:18:07,170 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:18:07,170 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:18:07,170 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,170 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,170 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:18:07,170 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:18:07,170 - sglang - INFO - raise NewConnectionError(
- 2025-07-20 11:18:07,170 - __main__ - INFO - raise NewConnectionError(
- 2025-07-20 11:18:07,170 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:18:07,170 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:18:07,171 - sglang - INFO -
- 2025-07-20 11:18:07,171 - __main__ - INFO -
- 2025-07-20 11:18:07,171 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:18:07,171 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:18:07,171 - sglang - INFO -
- 2025-07-20 11:18:07,171 - __main__ - INFO -
- 2025-07-20 11:18:07,171 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,171 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,171 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:18:07,171 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:18:07,171 - sglang - INFO - resp = conn.urlopen(
- 2025-07-20 11:18:07,171 - __main__ - INFO - resp = conn.urlopen(
- 2025-07-20 11:18:07,171 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:18:07,171 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:18:07,172 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:18:07,172 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:18:07,172 - sglang - INFO - retries = retries.increment(
- 2025-07-20 11:18:07,172 - __main__ - INFO - retries = retries.increment(
- 2025-07-20 11:18:07,172 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,172 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,172 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:18:07,172 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:18:07,172 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:18:07,172 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:18:07,172 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,172 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,173 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:18:07,173 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:18:07,173 - sglang - INFO -
- 2025-07-20 11:18:07,173 - __main__ - INFO -
- 2025-07-20 11:18:07,173 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:18:07,173 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:18:07,173 - sglang - INFO -
- 2025-07-20 11:18:07,173 - __main__ - INFO -
- 2025-07-20 11:18:07,173 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,173 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:18:07,173 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:18:07,173 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:18:07,173 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:18:07,174 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:18:07,174 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,174 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,174 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:18:07,174 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:18:07,174 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:18:07,174 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:18:07,174 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,174 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,174 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:18:07,174 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:18:07,174 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:18:07,174 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:18:07,175 - sglang - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:18:07,175 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:18:07,175 - sglang - INFO - self.load_model()
- 2025-07-20 11:18:07,175 - __main__ - INFO - self.load_model()
- 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:18:07,175 - sglang - INFO - self.model = get_model(
- 2025-07-20 11:18:07,175 - __main__ - INFO - self.model = get_model(
- 2025-07-20 11:18:07,175 - sglang - INFO - ^^^^^^^^^^
- 2025-07-20 11:18:07,175 - __main__ - INFO - ^^^^^^^^^^
- 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:18:07,176 - sglang - INFO - return loader.load_model(
- 2025-07-20 11:18:07,176 - __main__ - INFO - return loader.load_model(
- 2025-07-20 11:18:07,176 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,176 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,176 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:18:07,176 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:18:07,176 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:18:07,176 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:18:07,176 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:18:07,176 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:18:07,176 - sglang - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:18:07,176 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:18:07,177 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:18:07,177 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:18:07,177 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:18:07,177 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,177 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:18:07,177 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:18:07,177 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:18:07,177 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:18:07,177 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,177 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:18:07,178 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:18:07,178 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:18:07,178 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:18:07,178 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,178 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,178 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:18:07,178 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:18:07,178 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:18:07,178 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:18:07,178 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,178 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:18:07,180 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:18:07,180 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:18:07,180 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:18:07,180 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:18:07,180 - sglang - INFO - self._api.repo_info(
- 2025-07-20 11:18:07,180 - __main__ - INFO - self._api.repo_info(
- 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:18:07,180 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:18:07,180 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:18:07,180 - sglang - INFO - return method(
- 2025-07-20 11:18:07,180 - __main__ - INFO - return method(
- 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^
- 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^
- 2025-07-20 11:18:07,181 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:18:07,181 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:18:07,181 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:18:07,233 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:18:07,233 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:18:07,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:18:07,233 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:18:07,233 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:18:07,233 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:18:07,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:18:07,234 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:18:07,234 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:18:07,234 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:18:07,234 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:18:07,234 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:18:07,234 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:18:07,234 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:18:07,234 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:18:07,234 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:18:07,234 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:18:07,234 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 0e0286ef-566f-4e0f-8c78-0db3717091a5)')
- 2025-07-20 11:18:07,234 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 0e0286ef-566f-4e0f-8c78-0db3717091a5)')
- 2025-07-20 11:18:07,234 - sglang - INFO -
- 2025-07-20 11:18:07,234 - __main__ - INFO -
- 2025-07-20 11:18:07,235 - sglang - INFO - [2025-07-20 11:18:07] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:18:07,235 - __main__ - INFO - [2025-07-20 11:18:07] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:18:07,541 - __main__ - WARNING - SGLang server task ended
- 2025-07-20 11:18:08,506 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 11:18:14,589 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 11:18:14,891 - sglang - INFO - [2025-07-20 11:18:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=543652995, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:18:14,891 - __main__ - INFO - [2025-07-20 11:18:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=543652995, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:18:20,733 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 11:18:26,815 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 11:18:32,896 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 11:18:38,976 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 11:18:45,057 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 11:18:51,137 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 11:18:57,220 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 11:19:03,309 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 11:19:09,392 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 11:19:15,472 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 11:19:21,553 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 11:19:27,636 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 11:19:33,716 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 11:19:39,797 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 11:19:45,873 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 11:19:51,665 - sglang - INFO - [2025-07-20 11:19:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:19:51,665 - __main__ - INFO - [2025-07-20 11:19:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:19:51,960 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 11:19:58,041 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 11:19:58,086 - sglang - INFO - [2025-07-20 11:19:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:19:58,087 - __main__ - INFO - [2025-07-20 11:19:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:20:04,124 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-07-20 11:20:10,205 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-07-20 11:20:16,285 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Init torch distributed begin.
- 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Init torch distributed begin.
- 2025-07-20 11:20:22,368 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-07-20 11:20:23,643 - sglang - INFO - [2025-07-20 11:20:23 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:20:23,643 - __main__ - INFO - [2025-07-20 11:20:23 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:20:28,449 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-07-20 11:20:34,348 - sglang - INFO - [2025-07-20 11:20:34 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:20:34,349 - __main__ - INFO - [2025-07-20 11:20:34 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:20:34,349 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:20:34,349 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:20:34,349 - sglang - INFO - sock = connection.create_connection(
- 2025-07-20 11:20:34,349 - __main__ - INFO - sock = connection.create_connection(
- 2025-07-20 11:20:34,349 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,349 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,349 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:20:34,349 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:20:34,349 - sglang - INFO - raise err
- 2025-07-20 11:20:34,350 - __main__ - INFO - raise err
- 2025-07-20 11:20:34,350 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:20:34,350 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:20:34,350 - sglang - INFO - sock.connect(sa)
- 2025-07-20 11:20:34,350 - __main__ - INFO - sock.connect(sa)
- 2025-07-20 11:20:34,350 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:20:34,350 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:20:34,350 - sglang - INFO -
- 2025-07-20 11:20:34,350 - __main__ - INFO -
- 2025-07-20 11:20:34,350 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:20:34,350 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:20:34,350 - sglang - INFO -
- 2025-07-20 11:20:34,350 - __main__ - INFO -
- 2025-07-20 11:20:34,351 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,351 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:20:34,351 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:20:34,351 - sglang - INFO - response = self._make_request(
- 2025-07-20 11:20:34,351 - __main__ - INFO - response = self._make_request(
- 2025-07-20 11:20:34,351 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,351 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:20:34,351 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:20:34,351 - sglang - INFO - raise new_e
- 2025-07-20 11:20:34,351 - __main__ - INFO - raise new_e
- 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:20:34,352 - sglang - INFO - self._validate_conn(conn)
- 2025-07-20 11:20:34,352 - __main__ - INFO - self._validate_conn(conn)
- 2025-07-20 11:20:34,352 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:20:34,352 - sglang - INFO - conn.connect()
- 2025-07-20 11:20:34,352 - __main__ - INFO - conn.connect()
- 2025-07-20 11:20:34,352 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:20:34,352 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:20:34,352 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:20:34,352 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,352 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,353 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:20:34,353 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:20:34,353 - sglang - INFO - raise NewConnectionError(
- 2025-07-20 11:20:34,353 - __main__ - INFO - raise NewConnectionError(
- 2025-07-20 11:20:34,353 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:20:34,353 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:20:34,353 - sglang - INFO -
- 2025-07-20 11:20:34,353 - __main__ - INFO -
- 2025-07-20 11:20:34,353 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:20:34,353 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:20:34,353 - sglang - INFO -
- 2025-07-20 11:20:34,353 - __main__ - INFO -
- 2025-07-20 11:20:34,353 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,354 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,354 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:20:34,354 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:20:34,354 - sglang - INFO - resp = conn.urlopen(
- 2025-07-20 11:20:34,354 - __main__ - INFO - resp = conn.urlopen(
- 2025-07-20 11:20:34,354 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:20:34,354 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:20:34,354 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:20:34,354 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:20:34,354 - sglang - INFO - retries = retries.increment(
- 2025-07-20 11:20:34,354 - __main__ - INFO - retries = retries.increment(
- 2025-07-20 11:20:34,354 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,354 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,355 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:20:34,355 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:20:34,355 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:20:34,355 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:20:34,355 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,355 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,355 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:20:34,355 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:20:34,355 - sglang - INFO -
- 2025-07-20 11:20:34,355 - __main__ - INFO -
- 2025-07-20 11:20:34,355 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:20:34,355 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:20:34,356 - sglang - INFO -
- 2025-07-20 11:20:34,356 - __main__ - INFO -
- 2025-07-20 11:20:34,356 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,356 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:20:34,356 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:20:34,356 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:20:34,356 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:20:34,356 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:20:34,356 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,356 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,356 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:20:34,356 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:20:34,356 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:20:34,357 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:20:34,357 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,357 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,357 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:20:34,357 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:20:34,357 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:20:34,357 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:20:34,357 - sglang - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:20:34,357 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:20:34,357 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:20:34,357 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:20:34,357 - sglang - INFO - self.load_model()
- 2025-07-20 11:20:34,357 - __main__ - INFO - self.load_model()
- 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:20:34,358 - sglang - INFO - self.model = get_model(
- 2025-07-20 11:20:34,358 - __main__ - INFO - self.model = get_model(
- 2025-07-20 11:20:34,358 - sglang - INFO - ^^^^^^^^^^
- 2025-07-20 11:20:34,358 - __main__ - INFO - ^^^^^^^^^^
- 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:20:34,358 - sglang - INFO - return loader.load_model(
- 2025-07-20 11:20:34,358 - __main__ - INFO - return loader.load_model(
- 2025-07-20 11:20:34,358 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,358 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:20:34,359 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:20:34,359 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:20:34,359 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:20:34,359 - sglang - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:20:34,359 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:20:34,359 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:20:34,359 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:20:34,359 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:20:34,359 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,359 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:20:34,360 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:20:34,360 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:20:34,360 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,360 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,360 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:20:34,360 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:20:34,360 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:20:34,360 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,360 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,360 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:20:34,361 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:20:34,361 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:20:34,361 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:20:34,361 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:20:34,361 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:20:34,361 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:20:34,361 - sglang - INFO - self._api.repo_info(
- 2025-07-20 11:20:34,362 - __main__ - INFO - self._api.repo_info(
- 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:20:34,363 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:20:34,363 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:20:34,363 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,363 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:20:34,363 - sglang - INFO - return method(
- 2025-07-20 11:20:34,363 - __main__ - INFO - return method(
- 2025-07-20 11:20:34,363 - sglang - INFO - ^^^^^^^
- 2025-07-20 11:20:34,363 - __main__ - INFO - ^^^^^^^
- 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:20:34,363 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:20:34,363 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:20:34,364 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:20:34,364 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:20:34,364 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:20:34,364 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:20:34,364 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:20:34,364 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:20:34,364 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:20:34,364 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:20:34,365 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,365 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,365 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:20:34,365 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:20:34,365 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:20:34,365 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:20:34,365 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,365 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:20:34,365 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:20:34,365 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:20:34,365 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:20:34,365 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:20:34,365 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 6a081490-df5f-4ba3-bb52-3fc81355011d)')
- 2025-07-20 11:20:34,365 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 6a081490-df5f-4ba3-bb52-3fc81355011d)')
- 2025-07-20 11:20:34,365 - sglang - INFO -
- 2025-07-20 11:20:34,365 - __main__ - INFO -
- 2025-07-20 11:20:34,365 - sglang - INFO - [2025-07-20 11:20:34] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:20:34,365 - __main__ - INFO - [2025-07-20 11:20:34] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:20:34,533 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-07-20 11:20:34,619 - __main__ - WARNING - SGLang server task ended
- 2025-07-20 11:20:40,624 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-07-20 11:20:42,094 - sglang - INFO - [2025-07-20 11:20:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=741775413, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:20:42,094 - __main__ - INFO - [2025-07-20 11:20:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=741775413, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:20:46,686 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-07-20 11:20:52,765 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-07-20 11:20:58,847 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
- 2025-07-20 11:21:04,928 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
- 2025-07-20 11:21:11,009 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
- 2025-07-20 11:21:17,099 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
- 2025-07-20 11:21:23,177 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
- 2025-07-20 11:21:29,255 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
- 2025-07-20 11:21:35,334 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
- 2025-07-20 11:21:41,415 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
- 2025-07-20 11:21:47,493 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
- 2025-07-20 11:21:53,583 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
- 2025-07-20 11:21:59,665 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
- 2025-07-20 11:22:05,746 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
- 2025-07-20 11:22:11,830 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
- 2025-07-20 11:22:17,911 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
- 2025-07-20 11:22:18,669 - sglang - INFO - [2025-07-20 11:22:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:22:18,670 - __main__ - INFO - [2025-07-20 11:22:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:22:23,992 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
- 2025-07-20 11:22:24,827 - sglang - INFO - [2025-07-20 11:22:24 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:22:24,827 - __main__ - INFO - [2025-07-20 11:22:24 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:22:30,073 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
- 2025-07-20 11:22:36,155 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
- 2025-07-20 11:22:42,236 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
- 2025-07-20 11:22:45,011 - sglang - INFO - [2025-07-20 11:22:45 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:22:45,012 - sglang - INFO - [2025-07-20 11:22:45 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:22:45,012 - sglang - INFO - [2025-07-20 11:22:45 TP0] Init torch distributed begin.
- 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Init torch distributed begin.
- 2025-07-20 11:22:48,322 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
- 2025-07-20 11:22:50,393 - sglang - INFO - [2025-07-20 11:22:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:22:50,393 - __main__ - INFO - [2025-07-20 11:22:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:22:54,404 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
- 2025-07-20 11:23:00,485 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
- 2025-07-20 11:23:01,080 - sglang - INFO - [2025-07-20 11:23:01 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:23:01,080 - __main__ - INFO - [2025-07-20 11:23:01 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:23:01,080 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:23:01,080 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:23:01,080 - sglang - INFO - sock = connection.create_connection(
- 2025-07-20 11:23:01,080 - __main__ - INFO - sock = connection.create_connection(
- 2025-07-20 11:23:01,081 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,081 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,081 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:23:01,081 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:23:01,081 - sglang - INFO - raise err
- 2025-07-20 11:23:01,081 - __main__ - INFO - raise err
- 2025-07-20 11:23:01,081 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:23:01,081 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:23:01,081 - sglang - INFO - sock.connect(sa)
- 2025-07-20 11:23:01,081 - __main__ - INFO - sock.connect(sa)
- 2025-07-20 11:23:01,081 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:23:01,081 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:23:01,081 - sglang - INFO -
- 2025-07-20 11:23:01,081 - __main__ - INFO -
- 2025-07-20 11:23:01,081 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:23:01,081 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:23:01,081 - sglang - INFO -
- 2025-07-20 11:23:01,081 - __main__ - INFO -
- 2025-07-20 11:23:01,081 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,082 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:23:01,082 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:23:01,082 - sglang - INFO - response = self._make_request(
- 2025-07-20 11:23:01,082 - __main__ - INFO - response = self._make_request(
- 2025-07-20 11:23:01,082 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,082 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:23:01,082 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:23:01,082 - sglang - INFO - raise new_e
- 2025-07-20 11:23:01,082 - __main__ - INFO - raise new_e
- 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:23:01,083 - sglang - INFO - self._validate_conn(conn)
- 2025-07-20 11:23:01,083 - __main__ - INFO - self._validate_conn(conn)
- 2025-07-20 11:23:01,083 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:23:01,083 - sglang - INFO - conn.connect()
- 2025-07-20 11:23:01,083 - __main__ - INFO - conn.connect()
- 2025-07-20 11:23:01,083 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:23:01,083 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:23:01,083 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:23:01,083 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,083 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,084 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:23:01,084 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:23:01,084 - sglang - INFO - raise NewConnectionError(
- 2025-07-20 11:23:01,084 - __main__ - INFO - raise NewConnectionError(
- 2025-07-20 11:23:01,084 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:23:01,084 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:23:01,084 - sglang - INFO -
- 2025-07-20 11:23:01,084 - __main__ - INFO -
- 2025-07-20 11:23:01,084 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:23:01,084 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:23:01,084 - sglang - INFO -
- 2025-07-20 11:23:01,084 - __main__ - INFO -
- 2025-07-20 11:23:01,084 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,084 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,084 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:23:01,085 - sglang - INFO - resp = conn.urlopen(
- 2025-07-20 11:23:01,085 - __main__ - INFO - resp = conn.urlopen(
- 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:23:01,085 - sglang - INFO - retries = retries.increment(
- 2025-07-20 11:23:01,085 - __main__ - INFO - retries = retries.increment(
- 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:23:01,085 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:23:01,085 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,085 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:23:01,085 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:23:01,085 - sglang - INFO -
- 2025-07-20 11:23:01,085 - __main__ - INFO -
- 2025-07-20 11:23:01,085 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:23:01,085 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:23:01,085 - sglang - INFO -
- 2025-07-20 11:23:01,085 - __main__ - INFO -
- 2025-07-20 11:23:01,085 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,085 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:23:01,086 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:23:01,086 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:23:01,086 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:23:01,086 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:23:01,086 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:23:01,086 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:23:01,086 - sglang - INFO - self.load_model()
- 2025-07-20 11:23:01,086 - __main__ - INFO - self.load_model()
- 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:23:01,086 - sglang - INFO - self.model = get_model(
- 2025-07-20 11:23:01,087 - __main__ - INFO - self.model = get_model(
- 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^
- 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^
- 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:23:01,087 - sglang - INFO - return loader.load_model(
- 2025-07-20 11:23:01,087 - __main__ - INFO - return loader.load_model(
- 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:23:01,087 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:23:01,087 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:23:01,087 - sglang - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:23:01,087 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:23:01,087 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:23:01,087 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:23:01,088 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:23:01,088 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:23:01,088 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:23:01,088 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:23:01,088 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:23:01,088 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:23:01,088 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:23:01,088 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:23:01,089 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:23:01,089 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:23:01,089 - sglang - INFO - self._api.repo_info(
- 2025-07-20 11:23:01,089 - __main__ - INFO - self._api.repo_info(
- 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:23:01,089 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:23:01,089 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:23:01,089 - sglang - INFO - return method(
- 2025-07-20 11:23:01,089 - __main__ - INFO - return method(
- 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^
- 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^
- 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:23:01,090 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:23:01,090 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:23:01,090 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:23:01,090 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:23:01,090 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:23:01,090 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:23:01,090 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:23:01,090 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:23:01,091 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:23:01,091 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:23:01,091 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,091 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,091 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:23:01,091 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:23:01,091 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:23:01,091 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:23:01,091 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,091 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:23:01,091 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:23:01,091 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:23:01,091 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:23:01,091 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:23:01,091 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 3a71c81f-a953-43be-9246-0327729c923d)')
- 2025-07-20 11:23:01,091 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 3a71c81f-a953-43be-9246-0327729c923d)')
- 2025-07-20 11:23:01,091 - sglang - INFO -
- 2025-07-20 11:23:01,091 - __main__ - INFO -
- 2025-07-20 11:23:01,091 - sglang - INFO - [2025-07-20 11:23:01] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:23:01,091 - __main__ - INFO - [2025-07-20 11:23:01] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:23:01,432 - __main__ - WARNING - SGLang server task ended
- 2025-07-20 11:23:06,644 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
- 2025-07-20 11:23:08,746 - sglang - INFO - [2025-07-20 11:23:08] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=935034446, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:23:08,746 - __main__ - INFO - [2025-07-20 11:23:08] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=935034446, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 11:23:12,705 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
- 2025-07-20 11:23:18,766 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
- 2025-07-20 11:23:24,846 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
- 2025-07-20 11:23:30,927 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
- 2025-07-20 11:23:37,008 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
- 2025-07-20 11:23:43,087 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
- 2025-07-20 11:23:49,168 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
- 2025-07-20 11:23:55,248 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
- 2025-07-20 11:24:01,341 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
- 2025-07-20 11:24:07,423 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
- 2025-07-20 11:24:13,505 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
- 2025-07-20 11:24:19,585 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
- 2025-07-20 11:24:25,666 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
- 2025-07-20 11:24:31,748 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
- 2025-07-20 11:24:37,829 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
- 2025-07-20 11:24:43,910 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
- 2025-07-20 11:24:45,439 - sglang - INFO - [2025-07-20 11:24:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:24:45,439 - __main__ - INFO - [2025-07-20 11:24:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 11:24:49,991 - __main__ - WARNING - Attempt 114: Please wait for sglang server to become ready...
- 2025-07-20 11:24:51,481 - sglang - INFO - [2025-07-20 11:24:51 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:24:51,481 - __main__ - INFO - [2025-07-20 11:24:51 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 11:24:56,075 - __main__ - WARNING - Attempt 115: Please wait for sglang server to become ready...
- 2025-07-20 11:25:02,156 - __main__ - WARNING - Attempt 116: Please wait for sglang server to become ready...
- 2025-07-20 11:25:08,237 - __main__ - WARNING - Attempt 117: Please wait for sglang server to become ready...
- 2025-07-20 11:25:11,652 - sglang - INFO - [2025-07-20 11:25:11 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 11:25:11,653 - sglang - INFO - [2025-07-20 11:25:11 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 11:25:11,653 - sglang - INFO - [2025-07-20 11:25:11 TP0] Init torch distributed begin.
- 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Init torch distributed begin.
- 2025-07-20 11:25:14,319 - __main__ - WARNING - Attempt 118: Please wait for sglang server to become ready...
- 2025-07-20 11:25:17,053 - sglang - INFO - [2025-07-20 11:25:17 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:25:17,053 - __main__ - INFO - [2025-07-20 11:25:17 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 11:25:20,401 - __main__ - WARNING - Attempt 119: Please wait for sglang server to become ready...
- 2025-07-20 11:25:26,481 - __main__ - WARNING - Attempt 120: Please wait for sglang server to become ready...
- 2025-07-20 11:25:27,747 - sglang - INFO - [2025-07-20 11:25:27 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:25:27,748 - __main__ - INFO - [2025-07-20 11:25:27 TP0] Scheduler hit an exception: Traceback (most recent call last):
- 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
- 2025-07-20 11:25:27,748 - sglang - INFO - sock = connection.create_connection(
- 2025-07-20 11:25:27,748 - __main__ - INFO - sock = connection.create_connection(
- 2025-07-20 11:25:27,748 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,748 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
- 2025-07-20 11:25:27,748 - sglang - INFO - raise err
- 2025-07-20 11:25:27,748 - __main__ - INFO - raise err
- 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
- 2025-07-20 11:25:27,748 - sglang - INFO - sock.connect(sa)
- 2025-07-20 11:25:27,748 - __main__ - INFO - sock.connect(sa)
- 2025-07-20 11:25:27,748 - sglang - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:25:27,748 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
- 2025-07-20 11:25:27,749 - sglang - INFO -
- 2025-07-20 11:25:27,749 - __main__ - INFO -
- 2025-07-20 11:25:27,749 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:25:27,749 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:25:27,749 - sglang - INFO -
- 2025-07-20 11:25:27,749 - __main__ - INFO -
- 2025-07-20 11:25:27,749 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,749 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
- 2025-07-20 11:25:27,749 - sglang - INFO - response = self._make_request(
- 2025-07-20 11:25:27,749 - __main__ - INFO - response = self._make_request(
- 2025-07-20 11:25:27,749 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,749 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
- 2025-07-20 11:25:27,749 - sglang - INFO - raise new_e
- 2025-07-20 11:25:27,749 - __main__ - INFO - raise new_e
- 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
- 2025-07-20 11:25:27,749 - sglang - INFO - self._validate_conn(conn)
- 2025-07-20 11:25:27,749 - __main__ - INFO - self._validate_conn(conn)
- 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
- 2025-07-20 11:25:27,750 - sglang - INFO - conn.connect()
- 2025-07-20 11:25:27,750 - __main__ - INFO - conn.connect()
- 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
- 2025-07-20 11:25:27,750 - sglang - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:25:27,750 - __main__ - INFO - self.sock = sock = self._new_conn()
- 2025-07-20 11:25:27,750 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,750 - __main__ - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
- 2025-07-20 11:25:27,750 - sglang - INFO - raise NewConnectionError(
- 2025-07-20 11:25:27,750 - __main__ - INFO - raise NewConnectionError(
- 2025-07-20 11:25:27,750 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:25:27,750 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
- 2025-07-20 11:25:27,750 - sglang - INFO -
- 2025-07-20 11:25:27,750 - __main__ - INFO -
- 2025-07-20 11:25:27,750 - sglang - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:25:27,750 - __main__ - INFO - The above exception was the direct cause of the following exception:
- 2025-07-20 11:25:27,750 - sglang - INFO -
- 2025-07-20 11:25:27,750 - __main__ - INFO -
- 2025-07-20 11:25:27,750 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,750 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
- 2025-07-20 11:25:27,751 - sglang - INFO - resp = conn.urlopen(
- 2025-07-20 11:25:27,751 - __main__ - INFO - resp = conn.urlopen(
- 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
- 2025-07-20 11:25:27,751 - sglang - INFO - retries = retries.increment(
- 2025-07-20 11:25:27,751 - __main__ - INFO - retries = retries.increment(
- 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
- 2025-07-20 11:25:27,751 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:25:27,751 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
- 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,751 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:25:27,751 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
- 2025-07-20 11:25:27,751 - sglang - INFO -
- 2025-07-20 11:25:27,751 - __main__ - INFO -
- 2025-07-20 11:25:27,751 - sglang - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:25:27,752 - __main__ - INFO - During handling of the above exception, another exception occurred:
- 2025-07-20 11:25:27,752 - sglang - INFO -
- 2025-07-20 11:25:27,752 - __main__ - INFO -
- 2025-07-20 11:25:27,752 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,752 - __main__ - INFO - Traceback (most recent call last):
- 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
- 2025-07-20 11:25:27,752 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:25:27,752 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
- 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
- 2025-07-20 11:25:27,752 - sglang - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:25:27,752 - __main__ - INFO - self.tp_worker = TpWorkerClass(
- 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
- 2025-07-20 11:25:27,752 - sglang - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:25:27,752 - __main__ - INFO - self.model_runner = ModelRunner(
- 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^
- 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
- 2025-07-20 11:25:27,753 - sglang - INFO - self.load_model()
- 2025-07-20 11:25:27,753 - __main__ - INFO - self.load_model()
- 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
- 2025-07-20 11:25:27,753 - sglang - INFO - self.model = get_model(
- 2025-07-20 11:25:27,753 - __main__ - INFO - self.model = get_model(
- 2025-07-20 11:25:27,753 - sglang - INFO - ^^^^^^^^^^
- 2025-07-20 11:25:27,753 - __main__ - INFO - ^^^^^^^^^^
- 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
- 2025-07-20 11:25:27,753 - sglang - INFO - return loader.load_model(
- 2025-07-20 11:25:27,753 - __main__ - INFO - return loader.load_model(
- 2025-07-20 11:25:27,753 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,753 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
- 2025-07-20 11:25:27,753 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:25:27,753 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
- 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
- 2025-07-20 11:25:27,754 - sglang - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:25:27,754 - __main__ - INFO - for name, loaded_weight in weights:
- 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
- 2025-07-20 11:25:27,754 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:25:27,754 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
- 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
- 2025-07-20 11:25:27,754 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:25:27,754 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
- 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
- 2025-07-20 11:25:27,754 - sglang - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:25:27,754 - __main__ - INFO - hf_folder = download_weights_from_hf(
- 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
- 2025-07-20 11:25:27,754 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:25:27,755 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
- 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
- 2025-07-20 11:25:27,755 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:25:27,755 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
- 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
- 2025-07-20 11:25:27,755 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:25:27,755 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
- 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
- 2025-07-20 11:25:27,755 - sglang - INFO - self._api.repo_info(
- 2025-07-20 11:25:27,755 - __main__ - INFO - self._api.repo_info(
- 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:25:27,755 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:25:27,756 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
- 2025-07-20 11:25:27,756 - sglang - INFO - return method(
- 2025-07-20 11:25:27,756 - __main__ - INFO - return method(
- 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^
- 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^
- 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
- 2025-07-20 11:25:27,756 - sglang - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:25:27,756 - __main__ - INFO - return fn(*args, **kwargs)
- 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
- 2025-07-20 11:25:27,756 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:25:27,756 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
- 2025-07-20 11:25:27,757 - sglang - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:25:27,757 - __main__ - INFO - return self.request("GET", url, **kwargs)
- 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
- 2025-07-20 11:25:27,757 - sglang - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:25:27,757 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
- 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
- 2025-07-20 11:25:27,757 - sglang - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:25:27,757 - __main__ - INFO - r = adapter.send(request, **kwargs)
- 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
- 2025-07-20 11:25:27,757 - sglang - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:25:27,757 - __main__ - INFO - return super().send(request, *args, **kwargs)
- 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 11:25:27,758 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:25:27,758 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
- 2025-07-20 11:25:27,758 - sglang - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:25:27,758 - __main__ - INFO - raise ConnectionError(e, request=request)
- 2025-07-20 11:25:27,758 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 9331f4da-f005-4022-a34a-4bb0423deb4d)')
- 2025-07-20 11:25:27,758 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 9331f4da-f005-4022-a34a-4bb0423deb4d)')
- 2025-07-20 11:25:27,758 - sglang - INFO -
- 2025-07-20 11:25:27,758 - __main__ - INFO -
- 2025-07-20 11:25:27,758 - sglang - INFO - [2025-07-20 11:25:27] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:25:27,758 - __main__ - INFO - [2025-07-20 11:25:27] Received sigquit from a child proces. It usually means the child failed.
- 2025-07-20 11:25:28,039 - __main__ - WARNING - SGLang server task ended
- 2025-07-20 11:25:28,040 - __main__ - ERROR - Ended up starting the sglang server more than 5 times, cancelling pipeline
- 2025-07-20 11:25:28,040 - __main__ - ERROR -
- 2025-07-20 11:25:28,040 - __main__ - ERROR - Please make sure sglang is installed according to the latest instructions here: https://docs.sglang.ai/start/install.html
- 2025-07-20 15:06:33,448 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:06:33,448 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:06:33,448 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:06:33,451 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:06:33,678 - __main__ - INFO - Starting pipeline with PID 589922
- 2025-07-20 15:06:33,679 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:06:38,788 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:06:40,602 - sglang - INFO - [2025-07-20 15:06:40] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=555501304, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:06:40,603 - __main__ - INFO - [2025-07-20 15:06:40] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=555501304, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:06:41,702 - sglang - INFO - [2025-07-20 15:06:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:06:41,702 - __main__ - INFO - [2025-07-20 15:06:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:06:44,868 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:06:47,936 - sglang - INFO - [2025-07-20 15:06:47 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:06:47,936 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:06:47,938 - sglang - INFO - [2025-07-20 15:06:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:06:47,938 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:06:47,938 - sglang - INFO - [2025-07-20 15:06:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:06:47,938 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:06:47,939 - sglang - INFO - [2025-07-20 15:06:47 TP0] Init torch distributed begin.
- 2025-07-20 15:06:47,939 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Init torch distributed begin.
- 2025-07-20 15:06:50,947 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:06:53,328 - sglang - INFO - [2025-07-20 15:06:53 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:06:53,328 - __main__ - INFO - [2025-07-20 15:06:53 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:06:53,875 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:06:53,875 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:06:54,879 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.00s/it]
- 2025-07-20 15:06:54,879 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.00s/it]
- 2025-07-20 15:06:56,046 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.10s/it]
- 2025-07-20 15:06:56,046 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.10s/it]
- 2025-07-20 15:06:57,027 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:06:57,169 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
- 2025-07-20 15:06:57,170 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
- 2025-07-20 15:06:57,692 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-07-20 15:06:57,693 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-07-20 15:06:57,693 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
- 2025-07-20 15:06:57,693 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
- 2025-07-20 15:06:57,693 - sglang - INFO -
- 2025-07-20 15:06:57,693 - __main__ - INFO -
- 2025-07-20 15:06:57,772 - sglang - INFO - [2025-07-20 15:06:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:06:57,773 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:06:57,778 - sglang - INFO - [2025-07-20 15:06:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:06:57,778 - __main__ - INFO - [2025-07-20 15:06:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:06:57,779 - sglang - INFO - [2025-07-20 15:06:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:06:57,779 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:06:57,924 - sglang - INFO - [2025-07-20 15:06:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:06:57,924 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:06:59,713 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.79it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.24it/s]
- 2025-07-20 15:06:59,714 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.79it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s]
100%|██████████| 4/4 [00:01<00:00, 2.73it/s]
100%|██████████| 4/4 [00:01<00:00, 2.24it/s]
- 2025-07-20 15:06:59,714 - sglang - INFO - [2025-07-20 15:06:59 TP0] Capture cuda graph end. Time elapsed: 1.79 s
- 2025-07-20 15:06:59,714 - __main__ - INFO - [2025-07-20 15:06:59 TP0] Capture cuda graph end. Time elapsed: 1.79 s
- 2025-07-20 15:07:00,413 - sglang - INFO - [2025-07-20 15:07:00 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:07:00,413 - __main__ - INFO - [2025-07-20 15:07:00 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:07:00,503 - sglang - INFO - [2025-07-20 15:07:00] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:07:00,503 - __main__ - INFO - [2025-07-20 15:07:00] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:07:03,108 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:07:09,179 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:07:15,259 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:07:21,339 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:07:27,419 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:07:33,500 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:07:39,592 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:07:45,671 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:07:51,751 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:07:57,832 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:08:03,913 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:08:09,987 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:08:16,068 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:08:22,150 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:08:28,231 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:08:34,312 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:08:40,392 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:08:46,472 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 15:08:52,553 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 15:08:58,634 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 15:09:04,715 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 15:09:10,797 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 15:09:16,859 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 15:09:22,940 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 15:09:29,020 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 15:09:35,079 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 15:09:41,158 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 15:09:47,206 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 15:09:53,285 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 15:09:59,363 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 15:10:05,451 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 15:10:11,532 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 15:10:17,623 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 15:10:23,705 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 15:10:29,787 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 15:10:35,867 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 15:10:41,949 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 15:10:48,029 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 15:10:54,111 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 15:11:00,192 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 15:11:06,272 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 15:11:12,353 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 15:11:18,433 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 15:11:24,513 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 15:11:30,551 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 15:11:36,632 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 15:11:42,713 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 15:11:48,795 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 15:11:54,875 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 15:12:00,956 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 15:12:07,037 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 15:12:13,120 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 15:12:19,201 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 15:12:25,290 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 15:12:31,371 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 15:12:37,454 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 15:12:43,536 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 15:12:49,616 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 15:12:55,697 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 15:13:01,777 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 15:13:07,857 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 15:13:13,947 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 15:13:20,040 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-07-20 15:13:26,127 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-07-20 15:13:32,208 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
- 2025-07-20 15:13:38,288 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
- 2025-07-20 15:13:44,368 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
- 2025-07-20 15:13:50,448 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
- 2025-07-20 15:13:56,528 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
- 2025-07-20 15:14:02,607 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
- 2025-07-20 15:14:08,688 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
- 2025-07-20 15:14:14,769 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
- 2025-07-20 15:14:20,807 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
- 2025-07-20 15:14:26,849 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
- 2025-07-20 15:14:32,930 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
- 2025-07-20 15:14:39,011 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
- 2025-07-20 15:14:45,084 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
- 2025-07-20 15:14:51,164 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
- 2025-07-20 15:14:57,217 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
- 2025-07-20 15:15:03,297 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
- 2025-07-20 15:15:09,377 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
- 2025-07-20 15:15:15,457 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
- 2025-07-20 15:15:21,546 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
- 2025-07-20 15:15:27,625 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
- 2025-07-20 15:15:33,704 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
- 2025-07-20 15:15:39,741 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
- 2025-07-20 15:15:45,819 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
- 2025-07-20 15:15:51,900 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
- 2025-07-20 15:15:57,980 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
- 2025-07-20 15:16:04,060 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
- 2025-07-20 15:16:10,141 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
- 2025-07-20 15:16:16,222 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
- 2025-07-20 15:16:22,302 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
- 2025-07-20 15:16:28,382 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
- 2025-07-20 15:16:34,462 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
- 2025-07-20 15:16:40,544 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
- 2025-07-20 15:16:46,624 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
- 2025-07-20 15:16:52,704 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
- 2025-07-20 15:16:58,785 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
- 2025-07-20 15:17:04,865 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
- 2025-07-20 15:17:10,945 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
- 2025-07-20 15:17:17,028 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
- 2025-07-20 15:17:23,107 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
- 2025-07-20 15:17:29,187 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
- 2025-07-20 15:17:35,268 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
- 2025-07-20 15:17:41,347 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
- 2025-07-20 15:17:47,428 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
- 2025-07-20 15:17:53,508 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
- 2025-07-20 15:17:59,588 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
- 2025-07-20 15:18:00,129 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:19:30,848 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:19:30,848 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:19:30,848 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:19:30,851 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:19:31,040 - __main__ - INFO - Starting pipeline with PID 591212
- 2025-07-20 15:19:31,040 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:19:36,112 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:19:37,649 - sglang - INFO - [2025-07-20 15:19:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=862406034, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:19:37,649 - __main__ - INFO - [2025-07-20 15:19:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=862406034, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:19:38,651 - sglang - INFO - [2025-07-20 15:19:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:19:38,651 - __main__ - INFO - [2025-07-20 15:19:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:19:42,272 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:19:44,865 - sglang - INFO - [2025-07-20 15:19:44 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:19:44,865 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Init torch distributed begin.
- 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Init torch distributed begin.
- 2025-07-20 15:19:48,346 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:19:50,479 - sglang - INFO - [2025-07-20 15:19:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:19:50,479 - __main__ - INFO - [2025-07-20 15:19:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:19:50,998 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:19:50,998 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:19:52,034 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.04s/it]
- 2025-07-20 15:19:52,035 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.04s/it]
- 2025-07-20 15:19:53,425 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.24s/it]
- 2025-07-20 15:19:53,425 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.24s/it]
- 2025-07-20 15:19:54,424 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:19:54,682 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.25s/it]
- 2025-07-20 15:19:54,682 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.25s/it]
- 2025-07-20 15:19:55,303 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-07-20 15:19:55,303 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-07-20 15:19:55,303 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08s/it]
- 2025-07-20 15:19:55,303 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08s/it]
- 2025-07-20 15:19:55,304 - sglang - INFO -
- 2025-07-20 15:19:55,304 - __main__ - INFO -
- 2025-07-20 15:19:55,418 - sglang - INFO - [2025-07-20 15:19:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:19:55,418 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:19:55,424 - sglang - INFO - [2025-07-20 15:19:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:19:55,424 - __main__ - INFO - [2025-07-20 15:19:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:19:55,425 - sglang - INFO - [2025-07-20 15:19:55 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:19:55,425 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:19:55,574 - sglang - INFO - [2025-07-20 15:19:55 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:19:55,574 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:19:57,491 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.04s/it]
50%|█████ | 2/4 [00:01<00:01, 1.65it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.16it/s]
100%|██████████| 4/4 [00:01<00:00, 2.56it/s]
100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
- 2025-07-20 15:19:57,492 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.04s/it]
50%|█████ | 2/4 [00:01<00:01, 1.65it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.16it/s]
100%|██████████| 4/4 [00:01<00:00, 2.56it/s]
100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
- 2025-07-20 15:19:57,492 - sglang - INFO - [2025-07-20 15:19:57 TP0] Capture cuda graph end. Time elapsed: 1.92 s
- 2025-07-20 15:19:57,492 - __main__ - INFO - [2025-07-20 15:19:57 TP0] Capture cuda graph end. Time elapsed: 1.92 s
- 2025-07-20 15:19:58,175 - sglang - INFO - [2025-07-20 15:19:58 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:19:58,175 - __main__ - INFO - [2025-07-20 15:19:58 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:19:58,276 - sglang - INFO - [2025-07-20 15:19:58] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:19:58,277 - __main__ - INFO - [2025-07-20 15:19:58] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:20:00,503 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:20:06,583 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:20:12,663 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:20:18,755 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:20:24,835 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:20:30,915 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:20:36,995 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:20:43,070 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:20:49,149 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:20:55,239 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:21:01,331 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:21:07,412 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:21:13,491 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:21:19,572 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:21:25,652 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:21:31,733 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:21:37,771 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:21:43,885 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 15:21:49,967 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 15:21:54,867 - sglang - INFO - Process Process-2:
- 2025-07-20 15:21:54,867 - __main__ - INFO - Process Process-2:
- 2025-07-20 15:21:54,867 - sglang - INFO - Process Process-1:
- 2025-07-20 15:21:54,867 - __main__ - INFO - Process Process-1:
- 2025-07-20 15:21:54,868 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:22:05,628 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:22:05,628 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:22:05,628 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:22:05,631 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:22:05,832 - __main__ - INFO - Starting pipeline with PID 592226
- 2025-07-20 15:22:05,832 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:22:10,904 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:22:12,064 - sglang - INFO - [2025-07-20 15:22:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=958917757, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:22:12,065 - __main__ - INFO - [2025-07-20 15:22:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=958917757, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:22:13,046 - sglang - INFO - [2025-07-20 15:22:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:22:13,046 - __main__ - INFO - [2025-07-20 15:22:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:22:16,954 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:22:19,436 - sglang - INFO - [2025-07-20 15:22:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:22:19,436 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:22:19,438 - sglang - INFO - [2025-07-20 15:22:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:22:19,438 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:22:19,438 - sglang - INFO - [2025-07-20 15:22:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:22:19,438 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:22:19,439 - sglang - INFO - [2025-07-20 15:22:19 TP0] Init torch distributed begin.
- 2025-07-20 15:22:19,439 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Init torch distributed begin.
- 2025-07-20 15:22:23,038 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:22:24,881 - sglang - INFO - [2025-07-20 15:22:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:22:24,881 - __main__ - INFO - [2025-07-20 15:22:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:22:25,353 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:22:25,353 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:22:26,399 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.05s/it]
- 2025-07-20 15:22:26,399 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.05s/it]
- 2025-07-20 15:22:27,615 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
- 2025-07-20 15:22:27,615 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
- 2025-07-20 15:22:28,814 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.17s/it]
- 2025-07-20 15:22:28,814 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.17s/it]
- 2025-07-20 15:22:29,124 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:22:29,363 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08it/s]
- 2025-07-20 15:22:29,363 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08it/s]
- 2025-07-20 15:22:29,363 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-07-20 15:22:29,363 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
- 2025-07-20 15:22:29,363 - sglang - INFO -
- 2025-07-20 15:22:29,363 - __main__ - INFO -
- 2025-07-20 15:22:29,442 - sglang - INFO - [2025-07-20 15:22:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:22:29,442 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:22:29,448 - sglang - INFO - [2025-07-20 15:22:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:22:29,448 - __main__ - INFO - [2025-07-20 15:22:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:22:29,448 - sglang - INFO - [2025-07-20 15:22:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:22:29,448 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:22:29,613 - sglang - INFO - [2025-07-20 15:22:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:22:29,613 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:22:31,572 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.06s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.12it/s]
100%|██████████| 4/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.05it/s]
- 2025-07-20 15:22:31,572 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.06s/it]
50%|█████ | 2/4 [00:01<00:01, 1.63it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.12it/s]
100%|██████████| 4/4 [00:01<00:00, 2.50it/s]
100%|██████████| 4/4 [00:01<00:00, 2.05it/s]
- 2025-07-20 15:22:31,573 - sglang - INFO - [2025-07-20 15:22:31 TP0] Capture cuda graph end. Time elapsed: 1.96 s
- 2025-07-20 15:22:31,573 - __main__ - INFO - [2025-07-20 15:22:31 TP0] Capture cuda graph end. Time elapsed: 1.96 s
- 2025-07-20 15:22:32,277 - sglang - INFO - [2025-07-20 15:22:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:22:32,277 - __main__ - INFO - [2025-07-20 15:22:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:22:32,383 - sglang - INFO - [2025-07-20 15:22:32] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:22:32,384 - __main__ - INFO - [2025-07-20 15:22:32] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:22:35,205 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:22:41,287 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:22:41,728 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:23:40,632 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:23:40,632 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:23:40,632 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:23:40,635 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:23:40,835 - __main__ - INFO - Starting pipeline with PID 593043
- 2025-07-20 15:23:40,835 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:23:45,912 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:23:47,694 - sglang - INFO - [2025-07-20 15:23:47] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=371273265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:23:47,694 - __main__ - INFO - [2025-07-20 15:23:47] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=371273265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:23:48,988 - sglang - INFO - [2025-07-20 15:23:48] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:23:48,988 - __main__ - INFO - [2025-07-20 15:23:48] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:23:51,975 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:23:55,321 - sglang - INFO - [2025-07-20 15:23:55 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:23:55,321 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:23:55,324 - sglang - INFO - [2025-07-20 15:23:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:23:55,324 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:23:55,324 - sglang - INFO - [2025-07-20 15:23:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:23:55,324 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:23:55,332 - sglang - INFO - [2025-07-20 15:23:55 TP0] Init torch distributed begin.
- 2025-07-20 15:23:55,332 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Init torch distributed begin.
- 2025-07-20 15:23:58,074 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:24:00,896 - sglang - INFO - [2025-07-20 15:24:00 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:24:00,897 - __main__ - INFO - [2025-07-20 15:24:00 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:24:01,406 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:24:01,406 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:24:02,529 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.12s/it]
- 2025-07-20 15:24:02,529 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.12s/it]
- 2025-07-20 15:24:03,697 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
- 2025-07-20 15:24:03,697 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
- 2025-07-20 15:24:04,130 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:24:04,880 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
- 2025-07-20 15:24:04,880 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
- 2025-07-20 15:24:05,350 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-07-20 15:24:05,350 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
- 2025-07-20 15:24:05,350 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.01it/s]
- 2025-07-20 15:24:05,350 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.01it/s]
- 2025-07-20 15:24:05,351 - sglang - INFO -
- 2025-07-20 15:24:05,351 - __main__ - INFO -
- 2025-07-20 15:24:05,411 - sglang - INFO - [2025-07-20 15:24:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:24:05,412 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:24:05,418 - sglang - INFO - [2025-07-20 15:24:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:24:05,418 - __main__ - INFO - [2025-07-20 15:24:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:24:05,418 - sglang - INFO - [2025-07-20 15:24:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:24:05,418 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:24:05,574 - sglang - INFO - [2025-07-20 15:24:05 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:24:05,574 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:24:07,535 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.72it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.18it/s]
100%|██████████| 4/4 [00:01<00:00, 2.38it/s]
100%|██████████| 4/4 [00:01<00:00, 2.04it/s]
- 2025-07-20 15:24:07,535 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.72it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.18it/s]
100%|██████████| 4/4 [00:01<00:00, 2.38it/s]
100%|██████████| 4/4 [00:01<00:00, 2.04it/s]
- 2025-07-20 15:24:07,535 - sglang - INFO - [2025-07-20 15:24:07 TP0] Capture cuda graph end. Time elapsed: 1.96 s
- 2025-07-20 15:24:07,536 - __main__ - INFO - [2025-07-20 15:24:07 TP0] Capture cuda graph end. Time elapsed: 1.96 s
- 2025-07-20 15:24:08,225 - sglang - INFO - [2025-07-20 15:24:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:24:08,225 - __main__ - INFO - [2025-07-20 15:24:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:24:08,323 - sglang - INFO - [2025-07-20 15:24:08] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:24:08,323 - __main__ - INFO - [2025-07-20 15:24:08] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
- 2025-07-20 15:24:10,210 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:24:16,291 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:24:22,394 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:24:28,475 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:24:34,556 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:24:40,636 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:24:44,804 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:24:53,840 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:24:53,840 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:24:53,840 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:24:53,843 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:24:54,044 - __main__ - INFO - Starting pipeline with PID 593960
- 2025-07-20 15:24:54,044 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:24:54,112 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:24:55,142 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:24:56,188 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:24:57,251 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:24:58,318 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:24:59,476 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:25:00,384 - sglang - INFO - [2025-07-20 15:25:00] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=501574558, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:25:00,384 - __main__ - INFO - [2025-07-20 15:25:00] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=501574558, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:25:00,682 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:25:01,384 - sglang - INFO - [2025-07-20 15:25:01] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:25:01,384 - __main__ - INFO - [2025-07-20 15:25:01] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:25:01,764 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:25:02,832 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:25:03,900 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:25:04,968 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:25:06,142 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:25:07,214 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:25:07,905 - sglang - INFO - [2025-07-20 15:25:07 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:25:07,905 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Init torch distributed begin.
- 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Init torch distributed begin.
- 2025-07-20 15:25:08,289 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:25:09,356 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:25:10,431 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:25:11,506 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:25:12,581 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:25:13,489 - sglang - INFO - [2025-07-20 15:25:13 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:25:13,489 - __main__ - INFO - [2025-07-20 15:25:13 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:25:13,649 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:25:14,006 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:25:14,006 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:25:14,722 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:25:14,984 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
- 2025-07-20 15:25:14,984 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
- 2025-07-20 15:25:15,799 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:25:16,073 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.04s/it]
- 2025-07-20 15:25:16,073 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.04s/it]
- 2025-07-20 15:25:16,866 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 15:25:17,073 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.02s/it]
- 2025-07-20 15:25:17,073 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.02s/it]
- 2025-07-20 15:25:17,509 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.26it/s]
- 2025-07-20 15:25:17,509 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.26it/s]
- 2025-07-20 15:25:17,509 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-07-20 15:25:17,509 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
- 2025-07-20 15:25:17,509 - sglang - INFO -
- 2025-07-20 15:25:17,509 - __main__ - INFO -
- 2025-07-20 15:25:17,571 - sglang - INFO - [2025-07-20 15:25:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:25:17,571 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:25:17,577 - sglang - INFO - [2025-07-20 15:25:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:25:17,578 - __main__ - INFO - [2025-07-20 15:25:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:25:17,578 - sglang - INFO - [2025-07-20 15:25:17 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:25:17,578 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:25:17,730 - sglang - INFO - [2025-07-20 15:25:17 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:25:17,730 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:25:17,935 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 15:25:19,008 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 15:25:19,560 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.75it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.28it/s]
100%|██████████| 4/4 [00:01<00:00, 2.66it/s]
100%|██████████| 4/4 [00:01<00:00, 2.19it/s]
- 2025-07-20 15:25:19,560 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.02it/s]
50%|█████ | 2/4 [00:01<00:01, 1.75it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.28it/s]
100%|██████████| 4/4 [00:01<00:00, 2.66it/s]
100%|██████████| 4/4 [00:01<00:00, 2.19it/s]
- 2025-07-20 15:25:19,560 - sglang - INFO - [2025-07-20 15:25:19 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-20 15:25:19,560 - __main__ - INFO - [2025-07-20 15:25:19 TP0] Capture cuda graph end. Time elapsed: 1.83 s
- 2025-07-20 15:25:20,061 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 15:25:20,328 - sglang - INFO - [2025-07-20 15:25:20 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:25:20,328 - __main__ - INFO - [2025-07-20 15:25:20 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:25:21,149 - __main__ - INFO - sglang server is ready.
- 2025-07-20 15:25:21,150 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 15:25:21,150 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:25:21,150 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 15:25:21,150 - __main__ - INFO - Worker 0 processing work item 3a56970ad53c3199997edee4c2904936a58b713f
- 2025-07-20 15:25:21,153 - __main__ - INFO - Created all tasks for 3a56970ad53c3199997edee4c2904936a58b713f
- 2025-07-20 15:25:21,167 - __main__ - INFO - Got 14 pages to do for scripts/data/11440000MB2D0234372440125017009.pdf in worker 0
- 2025-07-20 15:25:21,172 - __main__ - INFO - Got 18 pages to do for scripts/data/11440000MB2D0234372440125017014.pdf in worker 0
- 2025-07-20 15:25:21,176 - __main__ - INFO - Got 18 pages to do for scripts/data/11440000MB2D0234372440125017020.pdf in worker 0
- 2025-07-20 15:25:21,180 - __main__ - INFO - Got 16 pages to do for scripts/data/11440000MB2D0234372440125017028.pdf in worker 0
- 2025-07-20 15:25:21,184 - __main__ - INFO - Got 16 pages to do for scripts/data/11440000MB2D0234372440125017041.pdf in worker 0
- 2025-07-20 15:25:21,189 - __main__ - INFO - Got 17 pages to do for scripts/data/11445200MB2C47380T4440125017008 (1).pdf in worker 0
- 2025-07-20 15:25:21,192 - __main__ - INFO - Got 14 pages to do for scripts/data/11440000MB2D0234372440125017049.pdf in worker 0
- 2025-07-20 15:25:21,196 - __main__ - INFO - Got 17 pages to do for scripts/data/11445200MB2C47380T4440125017008.pdf in worker 0
- 2025-07-20 15:25:21,199 - __main__ - INFO - Got 7 pages to do for scripts/data/11445200MB2D06387W3440125011001.pdf in worker 0
- 2025-07-20 15:25:21,202 - __main__ - INFO - Got 15 pages to do for scripts/data/11445200MB2C47380T4440125017023.pdf in worker 0
- 2025-07-20 15:25:21,205 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017006.pdf in worker 0
- 2025-07-20 15:25:21,208 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017003.pdf in worker 0
- 2025-07-20 15:25:21,211 - __main__ - INFO - Got 19 pages to do for scripts/data/11445200MB2D06387W3440125017011.pdf in worker 0
- 2025-07-20 15:25:21,214 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017007.pdf in worker 0
- 2025-07-20 15:25:21,216 - __main__ - INFO - Got 15 pages to do for scripts/data/11445200MB2D06387W3440125017023.pdf in worker 0
- 2025-07-20 15:25:21,219 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017041.pdf in worker 0
- 2025-07-20 15:25:21,220 - __main__ - INFO - Got 5 pages to do for scripts/data/11445200MB2D42580L4442014010000.pdf in worker 0
- 2025-07-20 15:25:21,222 - __main__ - INFO - Got 16 pages to do for scripts/data/11445200MB2D06387W3440125017048.pdf in worker 0
- 2025-07-20 15:25:21,225 - __main__ - INFO - Got 18 pages to do for scripts/data/11445200MB2D6222364440125017008.pdf in worker 0
- 2025-07-20 15:25:21,227 - __main__ - INFO - Got 13 pages to do for scripts/data/11445200MB2D6222364440125017049.pdf in worker 0
- 2025-07-20 15:25:21,228 - __main__ - INFO - Got 4 pages to do for scripts/data/11445202592174409C4442111641000.pdf in worker 0
- 2025-07-20 15:25:21,229 - __main__ - INFO - Got 6 pages to do for scripts/data/11445202592174409C4442111667001.pdf in worker 0
- 2025-07-20 15:25:21,230 - __main__ - INFO - Got 4 pages to do for scripts/data/11445202592174409C4442111820005.pdf in worker 0
- 2025-07-20 15:25:21,232 - __main__ - INFO - Got 14 pages to do for scripts/data/11445202MB2D1177604440125017023.pdf in worker 0
- 2025-07-20 15:25:21,234 - __main__ - INFO - Got 12 pages to do for scripts/data/11445202MB2D1177604440125017027.pdf in worker 0
- 2025-07-20 15:25:21,236 - __main__ - INFO - Got 14 pages to do for scripts/data/11445202MB2D1177604440125017041.pdf in worker 0
- 2025-07-20 15:25:21,237 - __main__ - INFO - Got 3 pages to do for scripts/data/11445202MB2D117760444212503R001.pdf in worker 0
- 2025-07-20 15:25:21,238 - __main__ - INFO - Got 4 pages to do for scripts/data/11445203007030456U4440711000000.pdf in worker 0
- 2025-07-20 15:25:21,239 - __main__ - INFO - Got 5 pages to do for scripts/data/11445203007030456U4442111640000.pdf in worker 0
- 2025-07-20 15:25:21,240 - __main__ - INFO - Got 4 pages to do for scripts/data/11445203007030456U4442111641000.pdf in worker 0
- 2025-07-20 15:25:21,242 - __main__ - INFO - Got 7 pages to do for scripts/data/11445203007030456U44421110A0005.pdf in worker 0
- 2025-07-20 15:25:21,243 - __main__ - INFO - Got 6 pages to do for scripts/data/11445203007030456U4442111667001.pdf in worker 0
- 2025-07-20 15:25:21,244 - __main__ - INFO - Got 5 pages to do for scripts/data/11445203707759010G4442014010000.pdf in worker 0
- 2025-07-20 15:25:21,246 - __main__ - INFO - Got 14 pages to do for scripts/data/11445203MB2C21084N4440125017008.pdf in worker 0
- 2025-07-20 15:25:21,247 - __main__ - INFO - Got 3 pages to do for scripts/data/11445203MB2C21084N444212503R001.pdf in worker 0
- 2025-07-20 15:25:21,248 - __main__ - INFO - Got 4 pages to do for scripts/data/11445222007029500K4440711000000.pdf in worker 0
- 2025-07-20 15:25:21,249 - __main__ - INFO - Got 6 pages to do for scripts/data/11445222007029500K44421110A0001.pdf in worker 0
- 2025-07-20 15:25:21,250 - __main__ - INFO - Got 6 pages to do for scripts/data/11445222007029500K44421110A0005.pdf in worker 0
- 2025-07-20 15:25:21,251 - __main__ - INFO - Got 4 pages to do for scripts/data/11445222007029527B4442106100010.pdf in worker 0
- 2025-07-20 15:25:21,252 - __main__ - INFO - Got 12 pages to do for scripts/data/11445222007030157E4440149001001.pdf in worker 0
- 2025-07-20 15:25:21,253 - __main__ - INFO - Got 4 pages to do for scripts/data/11445224007035644H4440711000000.pdf in worker 0
- 2025-07-20 15:25:21,254 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035644H44421110A0001.pdf in worker 0
- 2025-07-20 15:25:21,255 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035644H44421110A0005.pdf in worker 0
- 2025-07-20 15:25:21,257 - __main__ - INFO - Got 10 pages to do for scripts/data/11445224007035652C4440114020001.pdf in worker 0
- 2025-07-20 15:25:21,257 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035652C4442014010000.pdf in worker 0
- 2025-07-20 15:25:21,258 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4440711000000.pdf in worker 0
- 2025-07-20 15:25:21,259 - __main__ - INFO - Got 5 pages to do for scripts/data/11445281588281455A44421110A0001.pdf in worker 0
- 2025-07-20 15:25:21,260 - __main__ - INFO - Got 6 pages to do for scripts/data/11445281588281455A44421110A0005.pdf in worker 0
- 2025-07-20 15:25:21,261 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4442111641000.pdf in worker 0
- 2025-07-20 15:25:21,262 - __main__ - INFO - Got 6 pages to do for scripts/data/11445281588281455A4442111667001.pdf in worker 0
- 2025-07-20 15:25:21,263 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4442111820005.pdf in worker 0
- 2025-07-20 15:25:21,768 - __main__ - INFO - Got 6 pages to do for scripts/data/12445200456019383L3442111667001.pdf in worker 0
- 2025-07-20 15:25:21,837 - __main__ - INFO - Got 5 pages to do for scripts/data/12445200726503846U344201405500301.pdf in worker 0
- 2025-07-20 15:25:22,039 - sglang - INFO - [2025-07-20 15:25:21 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 15:25:22,039 - __main__ - INFO - [2025-07-20 15:25:21 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 15:25:22,040 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 15:25:29,937 - sglang - INFO - [2025-07-20 15:25:29] The server is fired up and ready to roll!
- 2025-07-20 15:25:29,937 - __main__ - INFO - [2025-07-20 15:25:29] The server is fired up and ready to roll!
- 2025-07-20 15:25:31,151 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 15:25:31,151 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:25:31,151 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 2025-07-20 15:25:41,152 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 15:25:41,153 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:25:41,153 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 2025-07-20 15:25:43,644 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-1
- 2025-07-20 15:25:43,685 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-2
- 2025-07-20 15:25:43,711 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-3
- 2025-07-20 15:25:43,805 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-5
- 2025-07-20 15:25:43,850 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-6
- 2025-07-20 15:25:43,851 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-4
- 2025-07-20 15:25:43,861 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-8
- 2025-07-20 15:25:43,901 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-7
- 2025-07-20 15:25:43,904 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-9
- 2025-07-20 15:25:43,975 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-11
- 2025-07-20 15:25:44,061 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-13
- 2025-07-20 15:25:44,078 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-14
- 2025-07-20 15:25:44,148 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-12
- 2025-07-20 15:25:44,156 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-1
- 2025-07-20 15:25:44,164 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-2
- 2025-07-20 15:25:44,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-3
- 2025-07-20 15:25:44,262 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-5
- 2025-07-20 15:25:44,337 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-10
- 2025-07-20 15:25:44,446 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-8
- 2025-07-20 15:25:44,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-6
- 2025-07-20 15:25:44,464 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-4
- 2025-07-20 15:25:44,538 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-10
- 2025-07-20 15:25:44,549 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-9
- 2025-07-20 15:25:44,572 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-11
- 2025-07-20 15:25:44,650 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-7
- 2025-07-20 15:25:44,664 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-13
- 2025-07-20 15:25:44,751 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-14
- 2025-07-20 15:25:44,844 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-15
- 2025-07-20 15:25:44,846 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-18
- 2025-07-20 15:25:44,858 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-16
- 2025-07-20 15:25:44,954 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-2
- 2025-07-20 15:25:44,956 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-3
- 2025-07-20 15:25:44,961 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-17
- 2025-07-20 15:25:45,041 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-1
- 2025-07-20 15:25:45,063 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-4
- 2025-07-20 15:25:45,150 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-5
- 2025-07-20 15:25:45,233 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-6
- 2025-07-20 15:25:45,244 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-7
- 2025-07-20 15:25:45,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-10
- 2025-07-20 15:25:45,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-8
- 2025-07-20 15:25:45,346 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-12
- 2025-07-20 15:25:45,361 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-9
- 2025-07-20 15:25:45,361 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-11
- 2025-07-20 15:25:45,554 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-15
- 2025-07-20 15:25:45,554 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-14
- 2025-07-20 15:25:45,556 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-12
- 2025-07-20 15:25:45,648 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-17
- 2025-07-20 15:25:45,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-2
- 2025-07-20 15:25:45,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-16
- 2025-07-20 15:25:45,733 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-3
- 2025-07-20 15:25:45,846 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-18
- 2025-07-20 15:25:45,849 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-1
- 2025-07-20 15:25:45,852 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-6
- 2025-07-20 15:25:45,934 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-4
- 2025-07-20 15:25:45,935 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-9
- 2025-07-20 15:25:45,940 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-7
- 2025-07-20 15:25:45,952 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-13
- 2025-07-20 15:25:46,045 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-16
- 2025-07-20 15:25:46,139 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-11
- 2025-07-20 15:25:46,147 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-12
- 2025-07-20 15:25:46,238 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-14
- 2025-07-20 15:25:46,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-10
- 2025-07-20 15:25:46,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-2
- 2025-07-20 15:25:46,335 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-3
- 2025-07-20 15:25:46,341 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-7
- 2025-07-20 15:25:46,342 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-1
- 2025-07-20 15:25:46,347 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-10
- 2025-07-20 15:25:46,356 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-6
- 2025-07-20 15:25:46,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-5
- 2025-07-20 15:25:46,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-5
- 2025-07-20 15:25:46,534 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-8
- 2025-07-20 15:25:46,535 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-16
- 2025-07-20 15:25:46,535 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-15
- 2025-07-20 15:25:46,548 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-4
- 2025-07-20 15:25:46,552 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-4
- 2025-07-20 15:25:46,637 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-8
- 2025-07-20 15:25:46,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-14
- 2025-07-20 15:25:46,651 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-9
- 2025-07-20 15:25:46,734 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-11
- 2025-07-20 15:25:46,736 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-3
- 2025-07-20 15:25:46,739 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-12
- 2025-07-20 15:25:46,744 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-5
- 2025-07-20 15:25:46,834 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-13
- 2025-07-20 15:25:46,836 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-15
- 2025-07-20 15:25:46,855 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-17
- 2025-07-20 15:25:46,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
- 2025-07-20 15:25:46,946 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-6
- 2025-07-20 15:25:47,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-8
- 2025-07-20 15:25:47,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-11
- 2025-07-20 15:25:47,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-13
- 2025-07-20 15:25:47,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-1
- 2025-07-20 15:25:47,041 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-13
- 2025-07-20 15:25:47,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-14
- 2025-07-20 15:25:47,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-6
- 2025-07-20 15:25:47,338 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-15
- 2025-07-20 15:25:47,340 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-16
- 2025-07-20 15:25:47,341 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-1
- 2025-07-20 15:25:47,345 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-4
- 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-8
- 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-3
- 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-4
- 2025-07-20 15:25:47,445 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-2
- 2025-07-20 15:25:47,446 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-11
- 2025-07-20 15:25:47,543 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-7
- 2025-07-20 15:25:47,635 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-14
- 2025-07-20 15:25:47,636 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-12
- 2025-07-20 15:25:47,669 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-9
- 2025-07-20 15:25:47,670 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-2
- 2025-07-20 15:25:47,670 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-7
- 2025-07-20 15:25:47,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-8
- 2025-07-20 15:25:47,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-2
- 2025-07-20 15:25:47,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-14
- 2025-07-20 15:25:47,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-11
- 2025-07-20 15:25:47,844 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-9
- 2025-07-20 15:25:47,942 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-7
- 2025-07-20 15:25:47,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-5
- 2025-07-20 15:25:47,946 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-17
- 2025-07-20 15:25:48,033 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-2
- 2025-07-20 15:25:48,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-1
- 2025-07-20 15:25:48,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-12
- 2025-07-20 15:25:48,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-4
- 2025-07-20 15:25:48,145 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-15
- 2025-07-20 15:25:48,237 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-10
- 2025-07-20 15:25:48,238 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-3
- 2025-07-20 15:25:48,239 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-7
- 2025-07-20 15:25:48,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-6
- 2025-07-20 15:25:48,246 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-13
- 2025-07-20 15:25:48,336 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-13
- 2025-07-20 15:25:48,337 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-16
- 2025-07-20 15:25:48,340 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-5
- 2025-07-20 15:25:48,342 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-10
- 2025-07-20 15:25:48,344 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-3
- 2025-07-20 15:25:48,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-5
- 2025-07-20 15:25:48,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-15
- 2025-07-20 15:25:48,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-10
- 2025-07-20 15:25:48,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-2
- 2025-07-20 15:25:48,444 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-3
- 2025-07-20 15:25:48,446 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-1
- 2025-07-20 15:25:48,446 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-7
- 2025-07-20 15:25:48,633 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-4
- 2025-07-20 15:25:48,634 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-9
- 2025-07-20 15:25:48,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-8
- 2025-07-20 15:25:48,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-3
- 2025-07-20 15:25:48,637 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-10
- 2025-07-20 15:25:48,639 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-6
- 2025-07-20 15:25:48,641 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-7
- 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-1
- 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-2
- 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-3
- 2025-07-20 15:25:48,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-4
- 2025-07-20 15:25:48,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-14
- 2025-07-20 15:25:48,741 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-11
- 2025-07-20 15:25:48,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-1
- 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-9
- 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-13
- 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-14
- 2025-07-20 15:25:48,843 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-2
- 2025-07-20 15:25:48,934 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-8
- 2025-07-20 15:25:49,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-14
- 2025-07-20 15:25:49,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-7
- 2025-07-20 15:25:49,038 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-9
- 2025-07-20 15:25:49,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-4
- 2025-07-20 15:25:49,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-3
- 2025-07-20 15:25:49,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-6
- 2025-07-20 15:25:49,044 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-6
- 2025-07-20 15:25:49,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-10
- 2025-07-20 15:25:49,135 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-9
- 2025-07-20 15:25:49,139 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-11
- 2025-07-20 15:25:49,141 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-4
- 2025-07-20 15:25:49,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-2
- 2025-07-20 15:25:49,243 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-5
- 2025-07-20 15:25:49,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-5
- 2025-07-20 15:25:49,247 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-9
- 2025-07-20 15:25:49,247 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-12
- 2025-07-20 15:25:49,334 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-7
- 2025-07-20 15:25:49,336 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-6
- 2025-07-20 15:25:49,339 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-5
- 2025-07-20 15:25:49,341 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-8
- 2025-07-20 15:25:49,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-19
- 2025-07-20 15:25:49,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-6
- 2025-07-20 15:25:49,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-14
- 2025-07-20 15:25:49,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-10
- 2025-07-20 15:25:49,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-8
- 2025-07-20 15:25:49,742 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-1
- 2025-07-20 15:25:49,745 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-6
- 2025-07-20 15:25:49,833 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-3
- 2025-07-20 15:25:49,834 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-14
- 2025-07-20 15:25:49,837 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-13
- 2025-07-20 15:25:49,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-1
- 2025-07-20 15:25:49,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-5
- 2025-07-20 15:25:49,841 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-11
- 2025-07-20 15:25:49,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-18
- 2025-07-20 15:25:49,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-16
- 2025-07-20 15:25:49,937 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-4
- 2025-07-20 15:25:49,937 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-2
- 2025-07-20 15:25:49,939 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-8
- 2025-07-20 15:25:49,941 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-7
- 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-4
- 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-1
- 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-3
- 2025-07-20 15:25:50,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-5
- 2025-07-20 15:25:50,036 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-15
- 2025-07-20 15:25:50,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-9
- 2025-07-20 15:25:50,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-13
- 2025-07-20 15:25:50,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-2
- 2025-07-20 15:25:50,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-10
- 2025-07-20 15:25:50,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-8
- 2025-07-20 15:25:50,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-17
- 2025-07-20 15:25:50,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-11
- 2025-07-20 15:25:50,134 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-13
- 2025-07-20 15:25:50,136 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-6
- 2025-07-20 15:25:50,138 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-3
- 2025-07-20 15:25:50,139 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-9
- 2025-07-20 15:25:50,140 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-9
- 2025-07-20 15:25:50,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-10
- 2025-07-20 15:25:50,144 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-6
- 2025-07-20 15:25:50,544 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-5
- 2025-07-20 15:25:50,544 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-12
- 2025-07-20 15:25:50,638 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-12
- 2025-07-20 15:25:50,641 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-11
- 2025-07-20 15:25:50,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-13
- 2025-07-20 15:25:50,644 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-5
- 2025-07-20 15:25:50,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-14
- 2025-07-20 15:25:50,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-3
- 2025-07-20 15:25:50,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-2
- 2025-07-20 15:25:50,942 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-3
- 2025-07-20 15:25:50,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-7
- 2025-07-20 15:25:50,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-4
- 2025-07-20 15:25:50,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-12
- 2025-07-20 15:25:50,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-10
- 2025-07-20 15:25:51,033 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-5
- 2025-07-20 15:25:51,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-14
- 2025-07-20 15:25:51,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-2
- 2025-07-20 15:25:51,036 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-5
- 2025-07-20 15:25:51,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-1
- 2025-07-20 15:25:51,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-13
- 2025-07-20 15:25:51,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-15
- 2025-07-20 15:25:51,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-7
- 2025-07-20 15:25:51,044 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-4
- 2025-07-20 15:25:51,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-13
- 2025-07-20 15:25:51,136 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-8
- 2025-07-20 15:25:51,137 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-12
- 2025-07-20 15:25:51,140 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-16
- 2025-07-20 15:25:51,141 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-6
- 2025-07-20 15:25:51,142 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-10
- 2025-07-20 15:25:51,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-2
- 2025-07-20 15:25:51,234 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-9
- 2025-07-20 15:25:51,236 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-10
- 2025-07-20 15:25:51,635 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-4
- 2025-07-20 15:25:51,639 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-6
- 2025-07-20 15:25:51,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-12
- 2025-07-20 15:25:51,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-3
- 2025-07-20 15:25:51,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-15
- 2025-07-20 15:25:52,238 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 15:25:52,238 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:25:52,238 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 2025-07-20 15:25:52,239 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-14
- 2025-07-20 15:25:52,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-7
- 2025-07-20 15:25:52,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-11
- 2025-07-20 15:25:52,335 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
- 2025-07-20 15:25:52,338 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-5
- 2025-07-20 15:25:52,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-8
- 2025-07-20 15:25:52,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-14
- 2025-07-20 15:25:52,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-1
- 2025-07-20 15:25:52,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-1
- 2025-07-20 15:25:52,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-2
- 2025-07-20 15:25:52,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-1
- 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-9
- 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-5
- 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-13
- 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-16
- 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-11
- 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-4
- 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-7
- 2025-07-20 15:25:52,349 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-11
- 2025-07-20 15:25:52,434 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-6
- 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-4
- 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-6
- 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-10
- 2025-07-20 15:25:52,537 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-8
- 2025-07-20 15:25:52,543 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-3
- 2025-07-20 15:25:52,543 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-3
- 2025-07-20 15:25:52,632 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-8
- 2025-07-20 15:25:52,633 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-12
- 2025-07-20 15:25:52,634 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-2
- 2025-07-20 15:25:52,635 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-2
- 2025-07-20 15:25:52,635 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-1
- 2025-07-20 15:25:52,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-1
- 2025-07-20 15:25:52,637 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-12
- 2025-07-20 15:25:52,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-9
- 2025-07-20 15:25:52,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-4
- 2025-07-20 15:25:53,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-11
- 2025-07-20 15:25:53,036 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-2
- 2025-07-20 15:25:53,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-11
- 2025-07-20 15:25:53,037 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-5
- 2025-07-20 15:25:53,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-2
- 2025-07-20 15:25:53,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-1
- 2025-07-20 15:25:53,042 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-1
- 2025-07-20 15:25:53,044 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-3
- 2025-07-20 15:25:53,134 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-1
- 2025-07-20 15:25:53,135 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-2
- 2025-07-20 15:25:53,136 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-4
- 2025-07-20 15:25:53,142 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-14
- 2025-07-20 15:25:53,236 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-6
- 2025-07-20 15:25:53,238 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-7
- 2025-07-20 15:25:53,332 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-5
- 2025-07-20 15:25:53,337 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-3
- 2025-07-20 15:25:53,339 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-6
- 2025-07-20 15:25:53,340 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-3
- 2025-07-20 15:25:53,343 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-8
- 2025-07-20 15:25:53,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-13
- 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-10
- 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-4
- 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-7
- 2025-07-20 15:25:53,347 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-9
- 2025-07-20 15:25:54,146 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-12
- 2025-07-20 15:25:54,233 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-1
- 2025-07-20 15:25:54,234 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-13
- 2025-07-20 15:25:54,235 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-2
- 2025-07-20 15:25:54,236 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-6
- 2025-07-20 15:25:54,237 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-11
- 2025-07-20 15:25:54,238 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-7
- 2025-07-20 15:25:54,241 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-12
- 2025-07-20 15:25:54,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-18
- 2025-07-20 15:25:54,334 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-2
- 2025-07-20 15:25:54,336 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-5
- 2025-07-20 15:25:54,340 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-3
- 2025-07-20 15:25:54,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-12
- 2025-07-20 15:25:54,435 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-9
- 2025-07-20 15:25:54,439 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-9
- 2025-07-20 15:25:54,441 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-8
- 2025-07-20 15:25:54,442 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-11
- 2025-07-20 15:25:54,535 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-12
- 2025-07-20 15:25:54,539 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-15
- 2025-07-20 15:25:54,542 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-4
- 2025-07-20 15:25:54,643 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-3
- 2025-07-20 15:25:54,737 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-3
- 2025-07-20 15:25:54,742 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-2
- 2025-07-20 15:25:54,833 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-2
- 2025-07-20 15:25:54,840 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-4
- 2025-07-20 15:25:54,840 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-10
- 2025-07-20 15:25:54,842 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-5
- 2025-07-20 15:25:54,843 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-4
- 2025-07-20 15:25:54,935 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-5
- 2025-07-20 15:25:54,936 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-14
- 2025-07-20 15:25:54,937 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-3
- 2025-07-20 15:25:54,938 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-3
- 2025-07-20 15:25:54,940 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-8
- 2025-07-20 15:25:55,037 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-7
- 2025-07-20 15:25:55,040 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-10
- 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-12
- 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-2
- 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-13
- 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-4
- 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-1
- 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-2
- 2025-07-20 15:25:55,137 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-2
- 2025-07-20 15:25:55,139 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-4
- 2025-07-20 15:25:55,144 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-3
- 2025-07-20 15:25:55,443 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-4
- 2025-07-20 15:25:55,444 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-6
- 2025-07-20 15:25:55,445 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-4
- 2025-07-20 15:25:55,533 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-5
- 2025-07-20 15:25:55,535 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-1
- 2025-07-20 15:25:55,541 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-3
- 2025-07-20 15:25:55,542 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-3
- 2025-07-20 15:25:55,543 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-1
- 2025-07-20 15:25:55,635 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-2
- 2025-07-20 15:25:55,636 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-7
- 2025-07-20 15:25:55,644 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-1
- 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-1
- 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-3
- 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-3
- 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-5
- 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-5
- 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-4
- 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-17
- 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-2
- 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-10
- 2025-07-20 15:25:55,648 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-14
- 2025-07-20 15:25:55,741 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-8
- 2025-07-20 15:25:55,743 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-2
- 2025-07-20 15:25:55,743 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-1
- 2025-07-20 15:25:55,744 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-6
- 2025-07-20 15:25:55,832 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-4
- 2025-07-20 15:25:55,834 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-13
- 2025-07-20 15:25:55,836 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-4
- 2025-07-20 15:25:55,839 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-9
- 2025-07-20 15:25:55,840 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-5
- 2025-07-20 15:25:55,842 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-3
- 2025-07-20 15:25:55,844 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-2
- 2025-07-20 15:25:55,935 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-6
- 2025-07-20 15:25:55,937 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-2
- 2025-07-20 15:25:55,939 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-4
- 2025-07-20 15:25:55,940 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-1
- 2025-07-20 15:25:55,940 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-3
- 2025-07-20 15:25:55,941 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-2
- 2025-07-20 15:25:55,942 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-3
- 2025-07-20 15:25:55,943 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-7
- 2025-07-20 15:25:55,945 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-12
- 2025-07-20 15:25:55,946 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-10
- 2025-07-20 15:25:56,033 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-11
- 2025-07-20 15:25:56,035 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-6
- 2025-07-20 15:25:56,036 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-2
- 2025-07-20 15:25:56,042 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-1
- 2025-07-20 15:25:56,045 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-1
- 2025-07-20 15:25:56,136 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-3
- 2025-07-20 15:25:56,138 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-4
- 2025-07-20 15:25:56,138 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-12
- 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-1
- 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-3
- 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-4
- 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-5
- 2025-07-20 15:25:56,737 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-4
- 2025-07-20 15:25:56,738 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-2
- 2025-07-20 15:25:56,738 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-3
- 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-1
- 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-1
- 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-1
- 2025-07-20 15:25:56,742 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-1
- 2025-07-20 15:25:56,745 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-2
- 2025-07-20 15:25:56,746 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-5
- 2025-07-20 15:25:56,833 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-11
- 2025-07-20 15:25:56,838 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-5
- 2025-07-20 15:25:56,840 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-7
- 2025-07-20 15:25:56,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-2
- 2025-07-20 15:25:56,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-2
- 2025-07-20 15:25:56,933 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-8
- 2025-07-20 15:25:56,934 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-2
- 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-11
- 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-1
- 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-5
- 2025-07-20 15:25:56,936 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-10
- 2025-07-20 15:25:56,941 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-2
- 2025-07-20 15:25:56,944 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-5
- 2025-07-20 15:25:56,944 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-7
- 2025-07-20 15:25:56,945 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-5
- 2025-07-20 15:25:57,062 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-3
- 2025-07-20 15:25:57,062 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-3
- 2025-07-20 15:25:57,134 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-1
- 2025-07-20 15:25:57,135 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-3
- 2025-07-20 15:25:57,135 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-5
- 2025-07-20 15:25:57,138 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-2
- 2025-07-20 15:25:57,141 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-4
- 2025-07-20 15:25:57,144 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-1
- 2025-07-20 15:25:57,146 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-6
- 2025-07-20 15:25:57,238 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
- 2025-07-20 15:25:57,240 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-8
- 2025-07-20 15:25:57,242 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-6
- 2025-07-20 15:25:57,243 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-5
- 2025-07-20 15:25:57,245 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-3
- 2025-07-20 15:25:57,247 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-2
- 2025-07-20 15:25:57,334 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-4
- 2025-07-20 15:25:57,334 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-6
- 2025-07-20 15:25:57,337 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-4
- 2025-07-20 15:25:57,345 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-1
- 2025-07-20 15:25:57,345 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-4
- 2025-07-20 15:25:57,346 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-4
- 2025-07-20 15:25:57,433 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-2
- 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-1
- 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-6
- 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-1
- 2025-07-20 15:25:57,637 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-3
- 2025-07-20 15:25:57,637 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-1
- 2025-07-20 15:25:57,638 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-3
- 2025-07-20 15:25:57,638 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-2
- 2025-07-20 15:25:57,639 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-4
- 2025-07-20 15:25:57,639 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-4
- 2025-07-20 15:25:57,641 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-1
- 2025-07-20 15:25:57,643 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-10
- 2025-07-20 15:25:57,644 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-2
- 2025-07-20 15:25:57,734 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-4
- 2025-07-20 15:25:57,735 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-6
- 2025-07-20 15:25:57,738 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-5
- 2025-07-20 15:25:57,738 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-5
- 2025-07-20 15:25:57,741 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-4
- 2025-07-20 15:25:57,743 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-6
- 2025-07-20 15:25:57,743 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-5
- 2025-07-20 15:25:57,744 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-3
- 2025-07-20 15:25:57,746 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-4
- 2025-07-20 15:25:57,833 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-4
- 2025-07-20 15:25:57,834 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-1
- 2025-07-20 15:25:57,834 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-3
- 2025-07-20 15:25:57,835 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-1
- 2025-07-20 15:25:57,836 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-2
- 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-5
- 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-3
- 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-3
- 2025-07-20 15:25:57,839 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-4
- 2025-07-20 15:25:57,839 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-1
- 2025-07-20 15:25:57,840 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-1
- 2025-07-20 15:25:57,842 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-4
- 2025-07-20 15:25:57,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-4
- 2025-07-20 15:25:57,845 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-9
- 2025-07-20 15:25:57,846 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-9
- 2025-07-20 15:25:57,935 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-3
- 2025-07-20 15:25:57,938 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-2
- 2025-07-20 15:25:57,939 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-1
- 2025-07-20 15:25:58,534 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-2
- 2025-07-20 15:25:58,836 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-2
- 2025-07-20 15:25:58,838 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-1
- 2025-07-20 15:25:58,838 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-1
- 2025-07-20 15:25:58,845 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-3
- 2025-07-20 15:26:00,241 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 15:26:00,743 - __main__ - INFO - Worker 1 processing work item 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
- 2025-07-20 15:26:00,745 - __main__ - INFO - Created all tasks for 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
- 2025-07-20 15:26:02,233 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf in worker 1
- 2025-07-20 15:26:03,633 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:03,634 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:26:03,639 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 1 | 10
- 2025-07-20 15:26:04,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-10
- 2025-07-20 15:26:04,871 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-1
- 2025-07-20 15:26:05,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-6
- 2025-07-20 15:26:05,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-2
- 2025-07-20 15:26:06,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-9
- 2025-07-20 15:26:06,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-3
- 2025-07-20 15:26:06,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-7
- 2025-07-20 15:26:06,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-5
- 2025-07-20 15:26:06,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-4
- 2025-07-20 15:26:07,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-8
- 2025-07-20 15:26:13,640 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:13,641 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:26:13,641 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 1 | 10
- 2025-07-20 15:26:13,758 - sglang - INFO - [2025-07-20 15:26:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 15:26:13,759 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 15:26:19,544 - sglang - INFO - [2025-07-20 15:26:19 TP0] Prefill batch. #new-seq: 6, #new-token: 13654, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 103
- 2025-07-20 15:26:19,544 - __main__ - INFO - sglang running req: 1 queue req: 103
- 2025-07-20 15:26:23,642 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:23,643 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:26:23,643 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 1 | 10
- 2025-07-20 15:26:32,078 - sglang - INFO - [2025-07-20 15:26:32 TP0] Decode batch. #running-req: 7, #token: 15706, token usage: 0.41, gen throughput (token/s): 3.32, #queue-req: 288
- 2025-07-20 15:26:32,078 - __main__ - INFO - sglang running req: 7 queue req: 288
- 2025-07-20 15:26:33,644 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:33,645 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:26:33,645 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 500
- 1 | 10
- 2025-07-20 15:26:33,938 - sglang - INFO - [2025-07-20 15:26:33 TP0] Decode batch. #running-req: 7, #token: 15986, token usage: 0.42, gen throughput (token/s): 150.47, #queue-req: 328
- 2025-07-20 15:26:33,939 - __main__ - INFO - sglang running req: 7 queue req: 328
- 2025-07-20 15:26:35,633 - sglang - INFO - [2025-07-20 15:26:35 TP0] Decode batch. #running-req: 7, #token: 16266, token usage: 0.43, gen throughput (token/s): 165.22, #queue-req: 371
- 2025-07-20 15:26:35,633 - __main__ - INFO - sglang running req: 7 queue req: 371
- 2025-07-20 15:26:37,071 - sglang - INFO - [2025-07-20 15:26:37 TP0] Prefill batch. #new-seq: 2, #new-token: 5491, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.40, #running-req: 6, #queue-req: 416
- 2025-07-20 15:26:37,071 - __main__ - INFO - sglang running req: 6 queue req: 416
- 2025-07-20 15:26:39,583 - sglang - INFO - [2025-07-20 15:26:39 TP0] Decode batch. #running-req: 8, #token: 20525, token usage: 0.54, gen throughput (token/s): 71.39, #queue-req: 482
- 2025-07-20 15:26:39,584 - __main__ - INFO - sglang running req: 8 queue req: 482
- 2025-07-20 15:26:40,731 - sglang - INFO - [2025-07-20 15:26:40 TP0] Decode batch. #running-req: 8, #token: 20845, token usage: 0.55, gen throughput (token/s): 279.01, #queue-req: 501
- 2025-07-20 15:26:40,731 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:41,631 - sglang - INFO - [2025-07-20 15:26:41 TP0] Decode batch. #running-req: 8, #token: 21165, token usage: 0.56, gen throughput (token/s): 355.27, #queue-req: 501
- 2025-07-20 15:26:41,631 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:42,534 - sglang - INFO - [2025-07-20 15:26:42 TP0] Decode batch. #running-req: 8, #token: 21485, token usage: 0.57, gen throughput (token/s): 354.55, #queue-req: 501
- 2025-07-20 15:26:42,534 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:43,436 - sglang - INFO - [2025-07-20 15:26:43 TP0] Decode batch. #running-req: 8, #token: 21805, token usage: 0.57, gen throughput (token/s): 354.47, #queue-req: 501
- 2025-07-20 15:26:43,437 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:43,646 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:43,647 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 12.43 12.43
- sglang_output_tokens 1.37 1.37
- 2025-07-20 15:26:43,647 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 500
- 1 | 0 | 10
- 2025-07-20 15:26:44,340 - sglang - INFO - [2025-07-20 15:26:44 TP0] Decode batch. #running-req: 8, #token: 22125, token usage: 0.58, gen throughput (token/s): 354.26, #queue-req: 501
- 2025-07-20 15:26:44,340 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:45,246 - sglang - INFO - [2025-07-20 15:26:45 TP0] Decode batch. #running-req: 8, #token: 22445, token usage: 0.59, gen throughput (token/s): 353.26, #queue-req: 501
- 2025-07-20 15:26:45,246 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:46,152 - sglang - INFO - [2025-07-20 15:26:46 TP0] Decode batch. #running-req: 8, #token: 22765, token usage: 0.60, gen throughput (token/s): 352.91, #queue-req: 501
- 2025-07-20 15:26:46,153 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:47,059 - sglang - INFO - [2025-07-20 15:26:47 TP0] Decode batch. #running-req: 8, #token: 23085, token usage: 0.61, gen throughput (token/s): 353.09, #queue-req: 501
- 2025-07-20 15:26:47,059 - __main__ - INFO - sglang running req: 8 queue req: 501
- 2025-07-20 15:26:47,445 - sglang - INFO - [2025-07-20 15:26:47 TP0] Prefill batch. #new-seq: 2, #new-token: 4317, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 499
- 2025-07-20 15:26:47,445 - __main__ - INFO - sglang running req: 7 queue req: 499
- 2025-07-20 15:26:49,333 - sglang - INFO - [2025-07-20 15:26:49 TP0] Decode batch. #running-req: 9, #token: 25434, token usage: 0.67, gen throughput (token/s): 150.38, #queue-req: 499
- 2025-07-20 15:26:49,333 - __main__ - INFO - sglang running req: 9 queue req: 499
- 2025-07-20 15:26:49,452 - sglang - INFO - [2025-07-20 15:26:49 TP0] Prefill batch. #new-seq: 2, #new-token: 4449, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 8, #queue-req: 497
- 2025-07-20 15:26:49,452 - __main__ - INFO - sglang running req: 8 queue req: 497
- 2025-07-20 15:26:51,267 - sglang - INFO - [2025-07-20 15:26:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2410, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 496
- 2025-07-20 15:26:51,268 - __main__ - INFO - sglang running req: 9 queue req: 496
- 2025-07-20 15:26:52,373 - sglang - INFO - [2025-07-20 15:26:52 TP0] Decode batch. #running-req: 10, #token: 27147, token usage: 0.71, gen throughput (token/s): 129.27, #queue-req: 496
- 2025-07-20 15:26:52,373 - __main__ - INFO - sglang running req: 10 queue req: 496
- 2025-07-20 15:26:53,344 - sglang - INFO - [2025-07-20 15:26:53 TP0] Decode batch. #running-req: 10, #token: 27547, token usage: 0.73, gen throughput (token/s): 412.03, #queue-req: 496
- 2025-07-20 15:26:53,344 - __main__ - INFO - sglang running req: 10 queue req: 496
- 2025-07-20 15:26:53,648 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:26:53,648 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 64.03 64.03
- sglang_output_tokens 14.15 14.15
- 2025-07-20 15:26:53,648 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 500
- 1 | 0 | 10
- 2025-07-20 15:26:54,313 - sglang - INFO - [2025-07-20 15:26:54 TP0] Decode batch. #running-req: 10, #token: 27947, token usage: 0.74, gen throughput (token/s): 412.69, #queue-req: 496
- 2025-07-20 15:26:54,313 - __main__ - INFO - sglang running req: 10 queue req: 496
- 2025-07-20 15:26:54,992 - sglang - INFO - [2025-07-20 15:26:54 TP0] Prefill batch. #new-seq: 2, #new-token: 3657, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 494
- 2025-07-20 15:26:54,992 - __main__ - INFO - sglang running req: 9 queue req: 494
- 2025-07-20 15:26:56,473 - sglang - INFO - [2025-07-20 15:26:56 TP0] Decode batch. #running-req: 11, #token: 29185, token usage: 0.77, gen throughput (token/s): 190.26, #queue-req: 494
- 2025-07-20 15:26:56,474 - __main__ - INFO - sglang running req: 11 queue req: 494
- 2025-07-20 15:26:57,452 - sglang - INFO - [2025-07-20 15:26:57 TP0] Decode batch. #running-req: 11, #token: 29625, token usage: 0.78, gen throughput (token/s): 449.47, #queue-req: 494
- 2025-07-20 15:26:57,452 - __main__ - INFO - sglang running req: 11 queue req: 494
- 2025-07-20 15:26:58,433 - sglang - INFO - [2025-07-20 15:26:58 TP0] Decode batch. #running-req: 11, #token: 30065, token usage: 0.79, gen throughput (token/s): 448.41, #queue-req: 494
- 2025-07-20 15:26:58,434 - __main__ - INFO - sglang running req: 11 queue req: 494
- 2025-07-20 15:26:59,415 - sglang - INFO - [2025-07-20 15:26:59 TP0] Decode batch. #running-req: 11, #token: 30505, token usage: 0.80, gen throughput (token/s): 448.19, #queue-req: 494
- 2025-07-20 15:26:59,415 - __main__ - INFO - sglang running req: 11 queue req: 494
- 2025-07-20 15:27:00,030 - sglang - INFO - [2025-07-20 15:27:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2750, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 493
- 2025-07-20 15:27:00,030 - __main__ - INFO - sglang running req: 10 queue req: 493
- 2025-07-20 15:27:01,189 - sglang - INFO - [2025-07-20 15:27:01 TP0] Decode batch. #running-req: 11, #token: 31492, token usage: 0.83, gen throughput (token/s): 247.43, #queue-req: 493
- 2025-07-20 15:27:01,190 - __main__ - INFO - sglang running req: 11 queue req: 493
- 2025-07-20 15:27:01,902 - sglang - INFO - [2025-07-20 15:27:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 492
- 2025-07-20 15:27:01,903 - __main__ - INFO - sglang running req: 10 queue req: 492
- 2025-07-20 15:27:02,965 - sglang - INFO - [2025-07-20 15:27:02 TP0] Decode batch. #running-req: 11, #token: 30988, token usage: 0.82, gen throughput (token/s): 247.19, #queue-req: 492
- 2025-07-20 15:27:02,966 - __main__ - INFO - sglang running req: 11 queue req: 492
- 2025-07-20 15:27:03,405 - sglang - INFO - [2025-07-20 15:27:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 491
- 2025-07-20 15:27:03,405 - __main__ - INFO - sglang running req: 10 queue req: 491
- 2025-07-20 15:27:03,649 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:03,649 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 123.89 123.89
- sglang_output_tokens 29.09 29.09
- 2025-07-20 15:27:03,649 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:04,743 - sglang - INFO - [2025-07-20 15:27:04 TP0] Decode batch. #running-req: 11, #token: 32458, token usage: 0.85, gen throughput (token/s): 246.97, #queue-req: 491
- 2025-07-20 15:27:04,743 - __main__ - INFO - sglang running req: 11 queue req: 491
- 2025-07-20 15:27:04,990 - sglang - INFO - [2025-07-20 15:27:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2822, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 490
- 2025-07-20 15:27:04,990 - __main__ - INFO - sglang running req: 10 queue req: 490
- 2025-07-20 15:27:06,561 - sglang - INFO - [2025-07-20 15:27:06 TP0] Decode batch. #running-req: 11, #token: 32019, token usage: 0.84, gen throughput (token/s): 241.51, #queue-req: 490
- 2025-07-20 15:27:06,561 - __main__ - INFO - sglang running req: 11 queue req: 490
- 2025-07-20 15:27:07,552 - sglang - INFO - [2025-07-20 15:27:07 TP0] Decode batch. #running-req: 11, #token: 32459, token usage: 0.85, gen throughput (token/s): 444.01, #queue-req: 490
- 2025-07-20 15:27:07,552 - __main__ - INFO - sglang running req: 11 queue req: 490
- 2025-07-20 15:27:07,676 - sglang - INFO - [2025-07-20 15:27:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 489
- 2025-07-20 15:27:07,676 - __main__ - INFO - sglang running req: 10 queue req: 489
- 2025-07-20 15:27:09,257 - sglang - INFO - [2025-07-20 15:27:09 TP0] Decode batch. #running-req: 11, #token: 31582, token usage: 0.83, gen throughput (token/s): 257.35, #queue-req: 489
- 2025-07-20 15:27:09,258 - __main__ - INFO - sglang running req: 11 queue req: 489
- 2025-07-20 15:27:09,752 - sglang - INFO - [2025-07-20 15:27:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 488
- 2025-07-20 15:27:09,752 - __main__ - INFO - sglang running req: 10 queue req: 488
- 2025-07-20 15:27:11,038 - sglang - INFO - [2025-07-20 15:27:11 TP0] Decode batch. #running-req: 11, #token: 31144, token usage: 0.82, gen throughput (token/s): 246.48, #queue-req: 488
- 2025-07-20 15:27:11,039 - __main__ - INFO - sglang running req: 11 queue req: 488
- 2025-07-20 15:27:11,187 - sglang - INFO - [2025-07-20 15:27:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1710, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 487
- 2025-07-20 15:27:11,187 - __main__ - INFO - sglang running req: 10 queue req: 487
- 2025-07-20 15:27:12,620 - sglang - INFO - [2025-07-20 15:27:12 TP0] Decode batch. #running-req: 11, #token: 30342, token usage: 0.80, gen throughput (token/s): 277.50, #queue-req: 487
- 2025-07-20 15:27:12,621 - __main__ - INFO - sglang running req: 11 queue req: 487
- 2025-07-20 15:27:13,612 - sglang - INFO - [2025-07-20 15:27:13 TP0] Decode batch. #running-req: 11, #token: 30782, token usage: 0.81, gen throughput (token/s): 443.61, #queue-req: 487
- 2025-07-20 15:27:13,613 - __main__ - INFO - sglang running req: 11 queue req: 487
- 2025-07-20 15:27:13,650 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:13,650 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 191.41 191.41
- sglang_output_tokens 50.01 50.01
- 2025-07-20 15:27:13,650 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 12 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:13,825 - sglang - INFO - [2025-07-20 15:27:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 486
- 2025-07-20 15:27:13,825 - __main__ - INFO - sglang running req: 10 queue req: 486
- 2025-07-20 15:27:15,437 - sglang - INFO - [2025-07-20 15:27:15 TP0] Decode batch. #running-req: 11, #token: 31443, token usage: 0.83, gen throughput (token/s): 240.53, #queue-req: 486
- 2025-07-20 15:27:15,437 - __main__ - INFO - sglang running req: 11 queue req: 486
- 2025-07-20 15:27:16,280 - sglang - INFO - [2025-07-20 15:27:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 485
- 2025-07-20 15:27:16,280 - __main__ - INFO - sglang running req: 10 queue req: 485
- 2025-07-20 15:27:17,251 - sglang - INFO - [2025-07-20 15:27:17 TP0] Decode batch. #running-req: 11, #token: 32091, token usage: 0.84, gen throughput (token/s): 241.99, #queue-req: 485
- 2025-07-20 15:27:17,252 - __main__ - INFO - sglang running req: 11 queue req: 485
- 2025-07-20 15:27:18,236 - sglang - INFO - [2025-07-20 15:27:18 TP0] Decode batch. #running-req: 11, #token: 32531, token usage: 0.86, gen throughput (token/s): 446.77, #queue-req: 485
- 2025-07-20 15:27:18,236 - __main__ - INFO - sglang running req: 11 queue req: 485
- 2025-07-20 15:27:18,953 - sglang - INFO - [2025-07-20 15:27:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 484
- 2025-07-20 15:27:18,953 - __main__ - INFO - sglang running req: 10 queue req: 484
- 2025-07-20 15:27:19,970 - sglang - INFO - [2025-07-20 15:27:19 TP0] Decode batch. #running-req: 11, #token: 32159, token usage: 0.85, gen throughput (token/s): 253.26, #queue-req: 484
- 2025-07-20 15:27:19,970 - __main__ - INFO - sglang running req: 11 queue req: 484
- 2025-07-20 15:27:20,960 - sglang - INFO - [2025-07-20 15:27:20 TP0] Decode batch. #running-req: 11, #token: 32599, token usage: 0.86, gen throughput (token/s): 444.41, #queue-req: 484
- 2025-07-20 15:27:20,960 - __main__ - INFO - sglang running req: 11 queue req: 484
- 2025-07-20 15:27:21,949 - sglang - INFO - [2025-07-20 15:27:21 TP0] Decode batch. #running-req: 10, #token: 29770, token usage: 0.78, gen throughput (token/s): 434.77, #queue-req: 484
- 2025-07-20 15:27:21,949 - __main__ - INFO - sglang running req: 10 queue req: 484
- 2025-07-20 15:27:22,932 - sglang - INFO - [2025-07-20 15:27:22 TP0] Decode batch. #running-req: 10, #token: 30170, token usage: 0.79, gen throughput (token/s): 406.80, #queue-req: 484
- 2025-07-20 15:27:22,933 - __main__ - INFO - sglang running req: 10 queue req: 484
- 2025-07-20 15:27:23,652 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:23,652 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 238.94 238.94
- sglang_output_tokens 65.94 65.94
- 2025-07-20 15:27:23,653 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 16 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:23,916 - sglang - INFO - [2025-07-20 15:27:23 TP0] Decode batch. #running-req: 10, #token: 30570, token usage: 0.80, gen throughput (token/s): 406.47, #queue-req: 484
- 2025-07-20 15:27:23,917 - __main__ - INFO - sglang running req: 10 queue req: 484
- 2025-07-20 15:27:24,899 - sglang - INFO - [2025-07-20 15:27:24 TP0] Decode batch. #running-req: 10, #token: 30970, token usage: 0.82, gen throughput (token/s): 406.94, #queue-req: 484
- 2025-07-20 15:27:24,900 - __main__ - INFO - sglang running req: 10 queue req: 484
- 2025-07-20 15:27:25,685 - sglang - INFO - [2025-07-20 15:27:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 483
- 2025-07-20 15:27:25,686 - __main__ - INFO - sglang running req: 9 queue req: 483
- 2025-07-20 15:27:26,703 - sglang - INFO - [2025-07-20 15:27:26 TP0] Decode batch. #running-req: 10, #token: 32049, token usage: 0.84, gen throughput (token/s): 221.21, #queue-req: 483
- 2025-07-20 15:27:26,703 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:27:27,024 - sglang - INFO - [2025-07-20 15:27:27 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 482
- 2025-07-20 15:27:27,024 - __main__ - INFO - sglang running req: 9 queue req: 482
- 2025-07-20 15:27:28,412 - sglang - INFO - [2025-07-20 15:27:28 TP0] Decode batch. #running-req: 10, #token: 31881, token usage: 0.84, gen throughput (token/s): 233.51, #queue-req: 482
- 2025-07-20 15:27:28,412 - __main__ - INFO - sglang running req: 10 queue req: 482
- 2025-07-20 15:27:29,398 - sglang - INFO - [2025-07-20 15:27:29 TP0] Decode batch. #running-req: 10, #token: 32281, token usage: 0.85, gen throughput (token/s): 405.47, #queue-req: 482
- 2025-07-20 15:27:29,398 - __main__ - INFO - sglang running req: 10 queue req: 482
- 2025-07-20 15:27:30,386 - sglang - INFO - [2025-07-20 15:27:30 TP0] Decode batch. #running-req: 10, #token: 32681, token usage: 0.86, gen throughput (token/s): 404.88, #queue-req: 482
- 2025-07-20 15:27:30,386 - __main__ - INFO - sglang running req: 10 queue req: 482
- 2025-07-20 15:27:30,832 - sglang - INFO - [2025-07-20 15:27:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 481
- 2025-07-20 15:27:30,832 - __main__ - INFO - sglang running req: 9 queue req: 481
- 2025-07-20 15:27:32,043 - sglang - INFO - [2025-07-20 15:27:32 TP0] Prefill batch. #new-seq: 1, #new-token: 1883, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 480
- 2025-07-20 15:27:32,043 - __main__ - INFO - sglang running req: 9 queue req: 480
- 2025-07-20 15:27:32,761 - sglang - INFO - [2025-07-20 15:27:32 TP0] Decode batch. #running-req: 10, #token: 30114, token usage: 0.79, gen throughput (token/s): 167.57, #queue-req: 480
- 2025-07-20 15:27:32,761 - __main__ - INFO - sglang running req: 10 queue req: 480
- 2025-07-20 15:27:33,400 - sglang - INFO - [2025-07-20 15:27:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2042, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 479
- 2025-07-20 15:27:33,400 - __main__ - INFO - sglang running req: 9 queue req: 479
- 2025-07-20 15:27:33,654 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:33,654 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 301.00 301.00
- sglang_output_tokens 84.35 84.35
- 2025-07-20 15:27:33,654 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 21 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:34,397 - sglang - INFO - [2025-07-20 15:27:34 TP0] Decode batch. #running-req: 10, #token: 28936, token usage: 0.76, gen throughput (token/s): 243.99, #queue-req: 479
- 2025-07-20 15:27:34,397 - __main__ - INFO - sglang running req: 10 queue req: 479
- 2025-07-20 15:27:35,380 - sglang - INFO - [2025-07-20 15:27:35 TP0] Decode batch. #running-req: 10, #token: 29336, token usage: 0.77, gen throughput (token/s): 406.83, #queue-req: 479
- 2025-07-20 15:27:35,380 - __main__ - INFO - sglang running req: 10 queue req: 479
- 2025-07-20 15:27:35,994 - sglang - INFO - [2025-07-20 15:27:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2507, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 478
- 2025-07-20 15:27:35,994 - __main__ - INFO - sglang running req: 9 queue req: 478
- 2025-07-20 15:27:37,112 - sglang - INFO - [2025-07-20 15:27:37 TP0] Decode batch. #running-req: 10, #token: 28526, token usage: 0.75, gen throughput (token/s): 230.30, #queue-req: 478
- 2025-07-20 15:27:37,112 - __main__ - INFO - sglang running req: 10 queue req: 478
- 2025-07-20 15:27:38,069 - sglang - INFO - [2025-07-20 15:27:38 TP0] Prefill batch. #new-seq: 2, #new-token: 3856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 476
- 2025-07-20 15:27:38,069 - __main__ - INFO - sglang running req: 9 queue req: 476
- 2025-07-20 15:27:39,349 - sglang - INFO - [2025-07-20 15:27:39 TP0] Decode batch. #running-req: 11, #token: 29220, token usage: 0.77, gen throughput (token/s): 178.84, #queue-req: 476
- 2025-07-20 15:27:39,349 - __main__ - INFO - sglang running req: 11 queue req: 476
- 2025-07-20 15:27:40,331 - sglang - INFO - [2025-07-20 15:27:40 TP0] Decode batch. #running-req: 11, #token: 29660, token usage: 0.78, gen throughput (token/s): 447.87, #queue-req: 476
- 2025-07-20 15:27:40,332 - __main__ - INFO - sglang running req: 11 queue req: 476
- 2025-07-20 15:27:41,315 - sglang - INFO - [2025-07-20 15:27:41 TP0] Decode batch. #running-req: 11, #token: 30100, token usage: 0.79, gen throughput (token/s): 447.58, #queue-req: 476
- 2025-07-20 15:27:41,315 - __main__ - INFO - sglang running req: 11 queue req: 476
- 2025-07-20 15:27:42,301 - sglang - INFO - [2025-07-20 15:27:42 TP0] Decode batch. #running-req: 11, #token: 30540, token usage: 0.80, gen throughput (token/s): 446.15, #queue-req: 476
- 2025-07-20 15:27:42,301 - __main__ - INFO - sglang running req: 11 queue req: 476
- 2025-07-20 15:27:42,376 - sglang - INFO - [2025-07-20 15:27:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2822, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 475
- 2025-07-20 15:27:42,376 - __main__ - INFO - sglang running req: 10 queue req: 475
- 2025-07-20 15:27:43,656 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:43,656 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 333.23 333.23
- sglang_output_tokens 94.40 94.40
- 2025-07-20 15:27:43,656 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 24 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:44,111 - sglang - INFO - [2025-07-20 15:27:44 TP0] Decode batch. #running-req: 11, #token: 30048, token usage: 0.79, gen throughput (token/s): 242.43, #queue-req: 475
- 2025-07-20 15:27:44,112 - __main__ - INFO - sglang running req: 11 queue req: 475
- 2025-07-20 15:27:44,235 - sglang - INFO - [2025-07-20 15:27:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 474
- 2025-07-20 15:27:44,236 - __main__ - INFO - sglang running req: 10 queue req: 474
- 2025-07-20 15:27:45,905 - sglang - INFO - [2025-07-20 15:27:45 TP0] Decode batch. #running-req: 11, #token: 31025, token usage: 0.82, gen throughput (token/s): 244.73, #queue-req: 474
- 2025-07-20 15:27:45,906 - __main__ - INFO - sglang running req: 11 queue req: 474
- 2025-07-20 15:27:46,352 - sglang - INFO - [2025-07-20 15:27:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1592, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 473
- 2025-07-20 15:27:46,352 - __main__ - INFO - sglang running req: 10 queue req: 473
- 2025-07-20 15:27:47,472 - sglang - INFO - [2025-07-20 15:27:47 TP0] Decode batch. #running-req: 11, #token: 29826, token usage: 0.79, gen throughput (token/s): 280.24, #queue-req: 473
- 2025-07-20 15:27:47,472 - __main__ - INFO - sglang running req: 11 queue req: 473
- 2025-07-20 15:27:47,718 - sglang - INFO - [2025-07-20 15:27:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2780, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 472
- 2025-07-20 15:27:47,719 - __main__ - INFO - sglang running req: 10 queue req: 472
- 2025-07-20 15:27:49,262 - sglang - INFO - [2025-07-20 15:27:49 TP0] Decode batch. #running-req: 11, #token: 30186, token usage: 0.79, gen throughput (token/s): 245.28, #queue-req: 472
- 2025-07-20 15:27:49,262 - __main__ - INFO - sglang running req: 11 queue req: 472
- 2025-07-20 15:27:50,250 - sglang - INFO - [2025-07-20 15:27:50 TP0] Decode batch. #running-req: 11, #token: 30626, token usage: 0.81, gen throughput (token/s): 444.99, #queue-req: 472
- 2025-07-20 15:27:50,251 - __main__ - INFO - sglang running req: 11 queue req: 472
- 2025-07-20 15:27:50,399 - sglang - INFO - [2025-07-20 15:27:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2750, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 471
- 2025-07-20 15:27:50,399 - __main__ - INFO - sglang running req: 10 queue req: 471
- 2025-07-20 15:27:52,040 - sglang - INFO - [2025-07-20 15:27:52 TP0] Decode batch. #running-req: 11, #token: 29925, token usage: 0.79, gen throughput (token/s): 245.29, #queue-req: 471
- 2025-07-20 15:27:52,040 - __main__ - INFO - sglang running req: 11 queue req: 471
- 2025-07-20 15:27:53,027 - sglang - INFO - [2025-07-20 15:27:53 TP0] Decode batch. #running-req: 11, #token: 30365, token usage: 0.80, gen throughput (token/s): 445.84, #queue-req: 471
- 2025-07-20 15:27:53,027 - __main__ - INFO - sglang running req: 11 queue req: 471
- 2025-07-20 15:27:53,657 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:27:53,658 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 367.79 367.79
- sglang_output_tokens 103.86 103.86
- 2025-07-20 15:27:53,658 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 28 | 500
- 1 | 0 | 10
- 2025-07-20 15:27:54,054 - sglang - INFO - [2025-07-20 15:27:54 TP0] Decode batch. #running-req: 11, #token: 30805, token usage: 0.81, gen throughput (token/s): 428.41, #queue-req: 471
- 2025-07-20 15:27:54,054 - __main__ - INFO - sglang running req: 11 queue req: 471
- 2025-07-20 15:27:54,698 - sglang - INFO - [2025-07-20 15:27:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 470
- 2025-07-20 15:27:54,698 - __main__ - INFO - sglang running req: 10 queue req: 470
- 2025-07-20 15:27:55,623 - sglang - INFO - [2025-07-20 15:27:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 469
- 2025-07-20 15:27:55,623 - __main__ - INFO - sglang running req: 10 queue req: 469
- 2025-07-20 15:27:56,603 - sglang - INFO - [2025-07-20 15:27:56 TP0] Decode batch. #running-req: 11, #token: 31689, token usage: 0.83, gen throughput (token/s): 171.86, #queue-req: 469
- 2025-07-20 15:27:56,603 - __main__ - INFO - sglang running req: 11 queue req: 469
- 2025-07-20 15:27:57,589 - sglang - INFO - [2025-07-20 15:27:57 TP0] Decode batch. #running-req: 10, #token: 30235, token usage: 0.80, gen throughput (token/s): 413.82, #queue-req: 469
- 2025-07-20 15:27:57,589 - __main__ - INFO - sglang running req: 10 queue req: 469
- 2025-07-20 15:27:57,688 - sglang - INFO - [2025-07-20 15:27:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2791, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 468
- 2025-07-20 15:27:57,688 - __main__ - INFO - sglang running req: 9 queue req: 468
- 2025-07-20 15:27:59,378 - sglang - INFO - [2025-07-20 15:27:59 TP0] Decode batch. #running-req: 10, #token: 29719, token usage: 0.78, gen throughput (token/s): 223.00, #queue-req: 468
- 2025-07-20 15:27:59,378 - __main__ - INFO - sglang running req: 10 queue req: 468
- 2025-07-20 15:28:00,381 - sglang - INFO - [2025-07-20 15:28:00 TP0] Decode batch. #running-req: 10, #token: 30119, token usage: 0.79, gen throughput (token/s): 398.66, #queue-req: 468
- 2025-07-20 15:28:00,382 - __main__ - INFO - sglang running req: 10 queue req: 468
- 2025-07-20 15:28:01,350 - sglang - INFO - [2025-07-20 15:28:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 467
- 2025-07-20 15:28:01,350 - __main__ - INFO - sglang running req: 9 queue req: 467
- 2025-07-20 15:28:02,176 - sglang - INFO - [2025-07-20 15:28:02 TP0] Decode batch. #running-req: 10, #token: 29762, token usage: 0.78, gen throughput (token/s): 222.33, #queue-req: 467
- 2025-07-20 15:28:02,176 - __main__ - INFO - sglang running req: 10 queue req: 467
- 2025-07-20 15:28:03,154 - sglang - INFO - [2025-07-20 15:28:03 TP0] Decode batch. #running-req: 10, #token: 30162, token usage: 0.79, gen throughput (token/s): 408.86, #queue-req: 467
- 2025-07-20 15:28:03,155 - __main__ - INFO - sglang running req: 10 queue req: 467
- 2025-07-20 15:28:03,252 - sglang - INFO - [2025-07-20 15:28:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 466
- 2025-07-20 15:28:03,253 - __main__ - INFO - sglang running req: 9 queue req: 466
- 2025-07-20 15:28:03,659 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:03,660 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 417.42 417.42
- sglang_output_tokens 118.15 118.15
- 2025-07-20 15:28:03,660 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 34 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:04,737 - sglang - INFO - [2025-07-20 15:28:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 465
- 2025-07-20 15:28:04,738 - __main__ - INFO - sglang running req: 9 queue req: 465
- 2025-07-20 15:28:05,531 - sglang - INFO - [2025-07-20 15:28:05 TP0] Decode batch. #running-req: 10, #token: 28889, token usage: 0.76, gen throughput (token/s): 167.48, #queue-req: 465
- 2025-07-20 15:28:05,531 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:06,512 - sglang - INFO - [2025-07-20 15:28:06 TP0] Decode batch. #running-req: 10, #token: 29289, token usage: 0.77, gen throughput (token/s): 407.80, #queue-req: 465
- 2025-07-20 15:28:06,512 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:07,493 - sglang - INFO - [2025-07-20 15:28:07 TP0] Decode batch. #running-req: 10, #token: 29689, token usage: 0.78, gen throughput (token/s): 407.51, #queue-req: 465
- 2025-07-20 15:28:07,493 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:08,477 - sglang - INFO - [2025-07-20 15:28:08 TP0] Decode batch. #running-req: 10, #token: 30089, token usage: 0.79, gen throughput (token/s): 406.69, #queue-req: 465
- 2025-07-20 15:28:08,477 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:09,460 - sglang - INFO - [2025-07-20 15:28:09 TP0] Decode batch. #running-req: 10, #token: 30489, token usage: 0.80, gen throughput (token/s): 406.61, #queue-req: 465
- 2025-07-20 15:28:09,461 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:10,444 - sglang - INFO - [2025-07-20 15:28:10 TP0] Decode batch. #running-req: 10, #token: 30889, token usage: 0.81, gen throughput (token/s): 406.51, #queue-req: 465
- 2025-07-20 15:28:10,445 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:11,429 - sglang - INFO - [2025-07-20 15:28:11 TP0] Decode batch. #running-req: 10, #token: 31289, token usage: 0.82, gen throughput (token/s): 406.23, #queue-req: 465
- 2025-07-20 15:28:11,429 - __main__ - INFO - sglang running req: 10 queue req: 465
- 2025-07-20 15:28:12,146 - sglang - INFO - [2025-07-20 15:28:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2512, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 464
- 2025-07-20 15:28:12,146 - __main__ - INFO - sglang running req: 9 queue req: 464
- 2025-07-20 15:28:12,924 - sglang - INFO - [2025-07-20 15:28:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 463
- 2025-07-20 15:28:12,924 - __main__ - INFO - sglang running req: 9 queue req: 463
- 2025-07-20 15:28:13,662 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:13,663 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 433.08 433.08
- sglang_output_tokens 122.55 122.55
- 2025-07-20 15:28:13,663 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 37 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:13,918 - sglang - INFO - [2025-07-20 15:28:13 TP0] Decode batch. #running-req: 10, #token: 30091, token usage: 0.79, gen throughput (token/s): 159.91, #queue-req: 463
- 2025-07-20 15:28:13,918 - __main__ - INFO - sglang running req: 10 queue req: 463
- 2025-07-20 15:28:14,657 - sglang - INFO - [2025-07-20 15:28:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2791, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 462
- 2025-07-20 15:28:14,657 - __main__ - INFO - sglang running req: 9 queue req: 462
- 2025-07-20 15:28:15,711 - sglang - INFO - [2025-07-20 15:28:15 TP0] Decode batch. #running-req: 10, #token: 29553, token usage: 0.78, gen throughput (token/s): 222.51, #queue-req: 462
- 2025-07-20 15:28:15,711 - __main__ - INFO - sglang running req: 10 queue req: 462
- 2025-07-20 15:28:16,645 - sglang - INFO - [2025-07-20 15:28:16 TP0] Prefill batch. #new-seq: 1, #new-token: 1663, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 461
- 2025-07-20 15:28:16,646 - __main__ - INFO - sglang running req: 9 queue req: 461
- 2025-07-20 15:28:17,273 - sglang - INFO - [2025-07-20 15:28:17 TP0] Decode batch. #running-req: 10, #token: 28008, token usage: 0.74, gen throughput (token/s): 255.48, #queue-req: 461
- 2025-07-20 15:28:17,273 - __main__ - INFO - sglang running req: 10 queue req: 461
- 2025-07-20 15:28:18,290 - sglang - INFO - [2025-07-20 15:28:18 TP0] Decode batch. #running-req: 10, #token: 28408, token usage: 0.75, gen throughput (token/s): 393.11, #queue-req: 461
- 2025-07-20 15:28:18,291 - __main__ - INFO - sglang running req: 10 queue req: 461
- 2025-07-20 15:28:19,268 - sglang - INFO - [2025-07-20 15:28:19 TP0] Decode batch. #running-req: 10, #token: 28808, token usage: 0.76, gen throughput (token/s): 408.98, #queue-req: 461
- 2025-07-20 15:28:19,269 - __main__ - INFO - sglang running req: 10 queue req: 461
- 2025-07-20 15:28:20,250 - sglang - INFO - [2025-07-20 15:28:20 TP0] Decode batch. #running-req: 10, #token: 29208, token usage: 0.77, gen throughput (token/s): 407.59, #queue-req: 461
- 2025-07-20 15:28:20,250 - __main__ - INFO - sglang running req: 10 queue req: 461
- 2025-07-20 15:28:21,234 - sglang - INFO - [2025-07-20 15:28:21 TP0] Decode batch. #running-req: 10, #token: 29608, token usage: 0.78, gen throughput (token/s): 406.31, #queue-req: 461
- 2025-07-20 15:28:21,235 - __main__ - INFO - sglang running req: 10 queue req: 461
- 2025-07-20 15:28:21,530 - sglang - INFO - [2025-07-20 15:28:21 TP0] Prefill batch. #new-seq: 2, #new-token: 3204, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 9, #queue-req: 459
- 2025-07-20 15:28:21,530 - __main__ - INFO - sglang running req: 9 queue req: 459
- 2025-07-20 15:28:22,688 - sglang - INFO - [2025-07-20 15:28:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1710, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 458
- 2025-07-20 15:28:22,688 - __main__ - INFO - sglang running req: 10 queue req: 458
- 2025-07-20 15:28:23,664 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:23,664 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 463.22 463.22
- sglang_output_tokens 132.10 132.10
- 2025-07-20 15:28:23,665 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 41 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:23,958 - sglang - INFO - [2025-07-20 15:28:23 TP0] Decode batch. #running-req: 11, #token: 28402, token usage: 0.75, gen throughput (token/s): 156.37, #queue-req: 458
- 2025-07-20 15:28:23,959 - __main__ - INFO - sglang running req: 11 queue req: 458
- 2025-07-20 15:28:24,552 - sglang - INFO - [2025-07-20 15:28:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2857, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 457
- 2025-07-20 15:28:24,552 - __main__ - INFO - sglang running req: 10 queue req: 457
- 2025-07-20 15:28:25,780 - sglang - INFO - [2025-07-20 15:28:25 TP0] Decode batch. #running-req: 10, #token: 28601, token usage: 0.75, gen throughput (token/s): 240.46, #queue-req: 457
- 2025-07-20 15:28:25,780 - __main__ - INFO - sglang running req: 10 queue req: 457
- 2025-07-20 15:28:25,781 - sglang - INFO - [2025-07-20 15:28:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2716, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 456
- 2025-07-20 15:28:25,781 - __main__ - INFO - sglang running req: 10 queue req: 456
- 2025-07-20 15:28:26,738 - sglang - INFO - [2025-07-20 15:28:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 455
- 2025-07-20 15:28:26,738 - __main__ - INFO - sglang running req: 10 queue req: 455
- 2025-07-20 15:28:28,396 - sglang - INFO - [2025-07-20 15:28:28 TP0] Decode batch. #running-req: 11, #token: 30815, token usage: 0.81, gen throughput (token/s): 167.80, #queue-req: 455
- 2025-07-20 15:28:28,397 - __main__ - INFO - sglang running req: 11 queue req: 455
- 2025-07-20 15:28:29,390 - sglang - INFO - [2025-07-20 15:28:29 TP0] Decode batch. #running-req: 10, #token: 27588, token usage: 0.73, gen throughput (token/s): 441.72, #queue-req: 455
- 2025-07-20 15:28:29,390 - __main__ - INFO - sglang running req: 10 queue req: 455
- 2025-07-20 15:28:29,390 - sglang - INFO - [2025-07-20 15:28:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1663, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 454
- 2025-07-20 15:28:29,391 - __main__ - INFO - sglang running req: 10 queue req: 454
- 2025-07-20 15:28:30,615 - sglang - INFO - [2025-07-20 15:28:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 453
- 2025-07-20 15:28:30,616 - __main__ - INFO - sglang running req: 10 queue req: 453
- 2025-07-20 15:28:31,500 - sglang - INFO - [2025-07-20 15:28:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1286, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 452
- 2025-07-20 15:28:31,501 - __main__ - INFO - sglang running req: 10 queue req: 452
- 2025-07-20 15:28:32,242 - sglang - INFO - [2025-07-20 15:28:32 TP0] Decode batch. #running-req: 11, #token: 27187, token usage: 0.72, gen throughput (token/s): 153.55, #queue-req: 452
- 2025-07-20 15:28:32,243 - __main__ - INFO - sglang running req: 11 queue req: 452
- 2025-07-20 15:28:33,224 - sglang - INFO - [2025-07-20 15:28:33 TP0] Decode batch. #running-req: 11, #token: 27627, token usage: 0.73, gen throughput (token/s): 448.21, #queue-req: 452
- 2025-07-20 15:28:33,224 - __main__ - INFO - sglang running req: 11 queue req: 452
- 2025-07-20 15:28:33,666 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:33,667 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 501.45 501.45
- sglang_output_tokens 142.17 142.17
- 2025-07-20 15:28:33,667 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 47 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:34,206 - sglang - INFO - [2025-07-20 15:28:34 TP0] Decode batch. #running-req: 11, #token: 28067, token usage: 0.74, gen throughput (token/s): 447.94, #queue-req: 452
- 2025-07-20 15:28:34,207 - __main__ - INFO - sglang running req: 11 queue req: 452
- 2025-07-20 15:28:34,873 - sglang - INFO - [2025-07-20 15:28:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 451
- 2025-07-20 15:28:34,873 - __main__ - INFO - sglang running req: 10 queue req: 451
- 2025-07-20 15:28:35,947 - sglang - INFO - [2025-07-20 15:28:35 TP0] Decode batch. #running-req: 11, #token: 29499, token usage: 0.78, gen throughput (token/s): 252.20, #queue-req: 451
- 2025-07-20 15:28:35,947 - __main__ - INFO - sglang running req: 11 queue req: 451
- 2025-07-20 15:28:36,937 - sglang - INFO - [2025-07-20 15:28:36 TP0] Decode batch. #running-req: 11, #token: 29939, token usage: 0.79, gen throughput (token/s): 444.63, #queue-req: 451
- 2025-07-20 15:28:36,937 - __main__ - INFO - sglang running req: 11 queue req: 451
- 2025-07-20 15:28:37,927 - sglang - INFO - [2025-07-20 15:28:37 TP0] Decode batch. #running-req: 11, #token: 30379, token usage: 0.80, gen throughput (token/s): 444.37, #queue-req: 451
- 2025-07-20 15:28:37,927 - __main__ - INFO - sglang running req: 11 queue req: 451
- 2025-07-20 15:28:37,977 - sglang - INFO - [2025-07-20 15:28:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2803, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 450
- 2025-07-20 15:28:37,977 - __main__ - INFO - sglang running req: 10 queue req: 450
- 2025-07-20 15:28:39,732 - sglang - INFO - [2025-07-20 15:28:39 TP0] Decode batch. #running-req: 11, #token: 31717, token usage: 0.83, gen throughput (token/s): 243.22, #queue-req: 450
- 2025-07-20 15:28:39,732 - __main__ - INFO - sglang running req: 11 queue req: 450
- 2025-07-20 15:28:40,724 - sglang - INFO - [2025-07-20 15:28:40 TP0] Decode batch. #running-req: 10, #token: 29980, token usage: 0.79, gen throughput (token/s): 441.39, #queue-req: 450
- 2025-07-20 15:28:40,724 - __main__ - INFO - sglang running req: 10 queue req: 450
- 2025-07-20 15:28:41,684 - sglang - INFO - [2025-07-20 15:28:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 449
- 2025-07-20 15:28:41,684 - __main__ - INFO - sglang running req: 9 queue req: 449
- 2025-07-20 15:28:42,545 - sglang - INFO - [2025-07-20 15:28:42 TP0] Decode batch. #running-req: 10, #token: 30017, token usage: 0.79, gen throughput (token/s): 219.15, #queue-req: 449
- 2025-07-20 15:28:42,545 - __main__ - INFO - sglang running req: 10 queue req: 449
- 2025-07-20 15:28:43,534 - sglang - INFO - [2025-07-20 15:28:43 TP0] Decode batch. #running-req: 10, #token: 30417, token usage: 0.80, gen throughput (token/s): 404.61, #queue-req: 449
- 2025-07-20 15:28:43,534 - __main__ - INFO - sglang running req: 10 queue req: 449
- 2025-07-20 15:28:43,669 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:43,670 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 510.83 510.83
- sglang_output_tokens 142.92 142.92
- 2025-07-20 15:28:43,670 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 51 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:44,524 - sglang - INFO - [2025-07-20 15:28:44 TP0] Decode batch. #running-req: 10, #token: 30817, token usage: 0.81, gen throughput (token/s): 403.87, #queue-req: 449
- 2025-07-20 15:28:44,524 - __main__ - INFO - sglang running req: 10 queue req: 449
- 2025-07-20 15:28:44,920 - sglang - INFO - [2025-07-20 15:28:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 448
- 2025-07-20 15:28:44,920 - __main__ - INFO - sglang running req: 9 queue req: 448
- 2025-07-20 15:28:46,250 - sglang - INFO - [2025-07-20 15:28:46 TP0] Decode batch. #running-req: 10, #token: 30369, token usage: 0.80, gen throughput (token/s): 231.18, #queue-req: 448
- 2025-07-20 15:28:46,250 - __main__ - INFO - sglang running req: 10 queue req: 448
- 2025-07-20 15:28:46,993 - sglang - INFO - [2025-07-20 15:28:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1863, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 447
- 2025-07-20 15:28:46,993 - __main__ - INFO - sglang running req: 9 queue req: 447
- 2025-07-20 15:28:47,766 - sglang - INFO - [2025-07-20 15:28:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2730, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 446
- 2025-07-20 15:28:47,767 - __main__ - INFO - sglang running req: 9 queue req: 446
- 2025-07-20 15:28:48,698 - sglang - INFO - [2025-07-20 15:28:48 TP0] Decode batch. #running-req: 10, #token: 28965, token usage: 0.76, gen throughput (token/s): 162.56, #queue-req: 446
- 2025-07-20 15:28:48,698 - __main__ - INFO - sglang running req: 10 queue req: 446
- 2025-07-20 15:28:49,728 - sglang - INFO - [2025-07-20 15:28:49 TP0] Decode batch. #running-req: 10, #token: 29365, token usage: 0.77, gen throughput (token/s): 388.26, #queue-req: 446
- 2025-07-20 15:28:49,728 - __main__ - INFO - sglang running req: 10 queue req: 446
- 2025-07-20 15:28:50,296 - sglang - INFO - [2025-07-20 15:28:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1611, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 445
- 2025-07-20 15:28:50,296 - __main__ - INFO - sglang running req: 9 queue req: 445
- 2025-07-20 15:28:51,297 - sglang - INFO - [2025-07-20 15:28:51 TP0] Decode batch. #running-req: 10, #token: 28516, token usage: 0.75, gen throughput (token/s): 254.34, #queue-req: 445
- 2025-07-20 15:28:51,297 - __main__ - INFO - sglang running req: 10 queue req: 445
- 2025-07-20 15:28:52,281 - sglang - INFO - [2025-07-20 15:28:52 TP0] Decode batch. #running-req: 10, #token: 28916, token usage: 0.76, gen throughput (token/s): 406.77, #queue-req: 445
- 2025-07-20 15:28:52,281 - __main__ - INFO - sglang running req: 10 queue req: 445
- 2025-07-20 15:28:53,264 - sglang - INFO - [2025-07-20 15:28:53 TP0] Decode batch. #running-req: 10, #token: 29316, token usage: 0.77, gen throughput (token/s): 406.56, #queue-req: 445
- 2025-07-20 15:28:53,265 - __main__ - INFO - sglang running req: 10 queue req: 445
- 2025-07-20 15:28:53,671 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:28:53,672 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 529.35 529.35
- sglang_output_tokens 149.32 149.32
- 2025-07-20 15:28:53,672 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 55 | 500
- 1 | 0 | 10
- 2025-07-20 15:28:54,251 - sglang - INFO - [2025-07-20 15:28:54 TP0] Decode batch. #running-req: 10, #token: 29716, token usage: 0.78, gen throughput (token/s): 405.56, #queue-req: 445
- 2025-07-20 15:28:54,251 - __main__ - INFO - sglang running req: 10 queue req: 445
- 2025-07-20 15:28:55,236 - sglang - INFO - [2025-07-20 15:28:55 TP0] Decode batch. #running-req: 10, #token: 30116, token usage: 0.79, gen throughput (token/s): 406.12, #queue-req: 445
- 2025-07-20 15:28:55,236 - __main__ - INFO - sglang running req: 10 queue req: 445
- 2025-07-20 15:28:55,457 - sglang - INFO - [2025-07-20 15:28:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 444
- 2025-07-20 15:28:55,457 - __main__ - INFO - sglang running req: 9 queue req: 444
- 2025-07-20 15:28:56,355 - sglang - INFO - [2025-07-20 15:28:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2903, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 443
- 2025-07-20 15:28:56,355 - __main__ - INFO - sglang running req: 9 queue req: 443
- 2025-07-20 15:28:57,804 - sglang - INFO - [2025-07-20 15:28:57 TP0] Decode batch. #running-req: 10, #token: 28631, token usage: 0.75, gen throughput (token/s): 154.97, #queue-req: 443
- 2025-07-20 15:28:57,804 - __main__ - INFO - sglang running req: 10 queue req: 443
- 2025-07-20 15:28:58,592 - sglang - INFO - [2025-07-20 15:28:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2733, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 442
- 2025-07-20 15:28:58,592 - __main__ - INFO - sglang running req: 9 queue req: 442
- 2025-07-20 15:28:59,599 - sglang - INFO - [2025-07-20 15:28:59 TP0] Decode batch. #running-req: 10, #token: 27979, token usage: 0.74, gen throughput (token/s): 222.23, #queue-req: 442
- 2025-07-20 15:28:59,599 - __main__ - INFO - sglang running req: 10 queue req: 442
- 2025-07-20 15:29:00,590 - sglang - INFO - [2025-07-20 15:29:00 TP0] Decode batch. #running-req: 10, #token: 28379, token usage: 0.75, gen throughput (token/s): 403.64, #queue-req: 442
- 2025-07-20 15:29:00,590 - __main__ - INFO - sglang running req: 10 queue req: 442
- 2025-07-20 15:29:01,552 - sglang - INFO - [2025-07-20 15:29:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2739, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 441
- 2025-07-20 15:29:01,552 - __main__ - INFO - sglang running req: 9 queue req: 441
- 2025-07-20 15:29:02,386 - sglang - INFO - [2025-07-20 15:29:02 TP0] Decode batch. #running-req: 10, #token: 29246, token usage: 0.77, gen throughput (token/s): 222.20, #queue-req: 441
- 2025-07-20 15:29:02,387 - __main__ - INFO - sglang running req: 10 queue req: 441
- 2025-07-20 15:29:02,608 - sglang - INFO - [2025-07-20 15:29:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1377, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 440
- 2025-07-20 15:29:02,608 - __main__ - INFO - sglang running req: 9 queue req: 440
- 2025-07-20 15:29:03,674 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:03,674 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 555.46 555.46
- sglang_output_tokens 156.88 156.88
- 2025-07-20 15:29:03,674 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 60 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:03,887 - sglang - INFO - [2025-07-20 15:29:03 TP0] Decode batch. #running-req: 10, #token: 29066, token usage: 0.77, gen throughput (token/s): 265.87, #queue-req: 440
- 2025-07-20 15:29:03,887 - __main__ - INFO - sglang running req: 10 queue req: 440
- 2025-07-20 15:29:04,548 - sglang - INFO - [2025-07-20 15:29:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1264, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 439
- 2025-07-20 15:29:04,549 - __main__ - INFO - sglang running req: 9 queue req: 439
- 2025-07-20 15:29:05,403 - sglang - INFO - [2025-07-20 15:29:05 TP0] Decode batch. #running-req: 10, #token: 27787, token usage: 0.73, gen throughput (token/s): 263.14, #queue-req: 439
- 2025-07-20 15:29:05,403 - __main__ - INFO - sglang running req: 10 queue req: 439
- 2025-07-20 15:29:05,722 - sglang - INFO - [2025-07-20 15:29:05 TP0] Prefill batch. #new-seq: 2, #new-token: 4842, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 437
- 2025-07-20 15:29:05,722 - __main__ - INFO - sglang running req: 9 queue req: 437
- 2025-07-20 15:29:07,832 - sglang - INFO - [2025-07-20 15:29:07 TP0] Decode batch. #running-req: 11, #token: 29796, token usage: 0.78, gen throughput (token/s): 175.39, #queue-req: 437
- 2025-07-20 15:29:07,832 - __main__ - INFO - sglang running req: 11 queue req: 437
- 2025-07-20 15:29:08,425 - sglang - INFO - [2025-07-20 15:29:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2796, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 436
- 2025-07-20 15:29:08,425 - __main__ - INFO - sglang running req: 10 queue req: 436
- 2025-07-20 15:29:09,639 - sglang - INFO - [2025-07-20 15:29:09 TP0] Decode batch. #running-req: 11, #token: 31520, token usage: 0.83, gen throughput (token/s): 242.92, #queue-req: 436
- 2025-07-20 15:29:09,639 - __main__ - INFO - sglang running req: 11 queue req: 436
- 2025-07-20 15:29:10,641 - sglang - INFO - [2025-07-20 15:29:10 TP0] Decode batch. #running-req: 11, #token: 31960, token usage: 0.84, gen throughput (token/s): 439.24, #queue-req: 436
- 2025-07-20 15:29:10,641 - __main__ - INFO - sglang running req: 11 queue req: 436
- 2025-07-20 15:29:11,641 - sglang - INFO - [2025-07-20 15:29:11 TP0] Decode batch. #running-req: 10, #token: 30963, token usage: 0.82, gen throughput (token/s): 431.98, #queue-req: 436
- 2025-07-20 15:29:11,641 - __main__ - INFO - sglang running req: 10 queue req: 436
- 2025-07-20 15:29:12,333 - sglang - INFO - [2025-07-20 15:29:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 435
- 2025-07-20 15:29:12,333 - __main__ - INFO - sglang running req: 9 queue req: 435
- 2025-07-20 15:29:13,441 - sglang - INFO - [2025-07-20 15:29:13 TP0] Decode batch. #running-req: 10, #token: 30400, token usage: 0.80, gen throughput (token/s): 221.64, #queue-req: 435
- 2025-07-20 15:29:13,441 - __main__ - INFO - sglang running req: 10 queue req: 435
- 2025-07-20 15:29:13,676 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:13,676 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 573.53 573.53
- sglang_output_tokens 161.01 161.01
- 2025-07-20 15:29:13,676 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 65 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:14,433 - sglang - INFO - [2025-07-20 15:29:14 TP0] Decode batch. #running-req: 9, #token: 27000, token usage: 0.71, gen throughput (token/s): 402.03, #queue-req: 435
- 2025-07-20 15:29:14,434 - __main__ - INFO - sglang running req: 9 queue req: 435
- 2025-07-20 15:29:14,434 - sglang - INFO - [2025-07-20 15:29:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2771, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 434
- 2025-07-20 15:29:14,434 - __main__ - INFO - sglang running req: 9 queue req: 434
- 2025-07-20 15:29:16,242 - sglang - INFO - [2025-07-20 15:29:16 TP0] Decode batch. #running-req: 10, #token: 30171, token usage: 0.79, gen throughput (token/s): 221.21, #queue-req: 434
- 2025-07-20 15:29:16,242 - __main__ - INFO - sglang running req: 10 queue req: 434
- 2025-07-20 15:29:16,859 - sglang - INFO - [2025-07-20 15:29:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2144, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 433
- 2025-07-20 15:29:16,859 - __main__ - INFO - sglang running req: 9 queue req: 433
- 2025-07-20 15:29:17,902 - sglang - INFO - [2025-07-20 15:29:17 TP0] Decode batch. #running-req: 10, #token: 29780, token usage: 0.78, gen throughput (token/s): 240.30, #queue-req: 433
- 2025-07-20 15:29:17,902 - __main__ - INFO - sglang running req: 10 queue req: 433
- 2025-07-20 15:29:18,885 - sglang - INFO - [2025-07-20 15:29:18 TP0] Decode batch. #running-req: 10, #token: 30180, token usage: 0.79, gen throughput (token/s): 406.90, #queue-req: 433
- 2025-07-20 15:29:18,886 - __main__ - INFO - sglang running req: 10 queue req: 433
- 2025-07-20 15:29:19,870 - sglang - INFO - [2025-07-20 15:29:19 TP0] Decode batch. #running-req: 10, #token: 30580, token usage: 0.81, gen throughput (token/s): 406.04, #queue-req: 433
- 2025-07-20 15:29:19,870 - __main__ - INFO - sglang running req: 10 queue req: 433
- 2025-07-20 15:29:20,068 - sglang - INFO - [2025-07-20 15:29:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2166, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 432
- 2025-07-20 15:29:20,068 - __main__ - INFO - sglang running req: 9 queue req: 432
- 2025-07-20 15:29:21,531 - sglang - INFO - [2025-07-20 15:29:21 TP0] Decode batch. #running-req: 10, #token: 29523, token usage: 0.78, gen throughput (token/s): 240.32, #queue-req: 432
- 2025-07-20 15:29:21,531 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:22,516 - sglang - INFO - [2025-07-20 15:29:22 TP0] Decode batch. #running-req: 10, #token: 29923, token usage: 0.79, gen throughput (token/s): 406.15, #queue-req: 432
- 2025-07-20 15:29:22,516 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:23,500 - sglang - INFO - [2025-07-20 15:29:23 TP0] Decode batch. #running-req: 10, #token: 30323, token usage: 0.80, gen throughput (token/s): 406.20, #queue-req: 432
- 2025-07-20 15:29:23,501 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:23,677 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:23,677 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 582.10 582.10
- sglang_output_tokens 163.61 163.61
- 2025-07-20 15:29:23,678 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 68 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:24,488 - sglang - INFO - [2025-07-20 15:29:24 TP0] Decode batch. #running-req: 10, #token: 30723, token usage: 0.81, gen throughput (token/s): 405.01, #queue-req: 432
- 2025-07-20 15:29:24,488 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:25,475 - sglang - INFO - [2025-07-20 15:29:25 TP0] Decode batch. #running-req: 10, #token: 31123, token usage: 0.82, gen throughput (token/s): 405.25, #queue-req: 432
- 2025-07-20 15:29:25,475 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:26,462 - sglang - INFO - [2025-07-20 15:29:26 TP0] Decode batch. #running-req: 10, #token: 31523, token usage: 0.83, gen throughput (token/s): 405.14, #queue-req: 432
- 2025-07-20 15:29:26,462 - __main__ - INFO - sglang running req: 10 queue req: 432
- 2025-07-20 15:29:26,857 - sglang - INFO - [2025-07-20 15:29:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 431
- 2025-07-20 15:29:26,857 - __main__ - INFO - sglang running req: 9 queue req: 431
- 2025-07-20 15:29:28,246 - sglang - INFO - [2025-07-20 15:29:28 TP0] Decode batch. #running-req: 10, #token: 30814, token usage: 0.81, gen throughput (token/s): 223.69, #queue-req: 431
- 2025-07-20 15:29:28,246 - __main__ - INFO - sglang running req: 10 queue req: 431
- 2025-07-20 15:29:29,237 - sglang - INFO - [2025-07-20 15:29:29 TP0] Decode batch. #running-req: 10, #token: 31214, token usage: 0.82, gen throughput (token/s): 403.75, #queue-req: 431
- 2025-07-20 15:29:29,237 - __main__ - INFO - sglang running req: 10 queue req: 431
- 2025-07-20 15:29:29,980 - sglang - INFO - [2025-07-20 15:29:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2401, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 430
- 2025-07-20 15:29:29,980 - __main__ - INFO - sglang running req: 9 queue req: 430
- 2025-07-20 15:29:30,967 - sglang - INFO - [2025-07-20 15:29:30 TP0] Decode batch. #running-req: 10, #token: 30789, token usage: 0.81, gen throughput (token/s): 230.63, #queue-req: 430
- 2025-07-20 15:29:30,967 - __main__ - INFO - sglang running req: 10 queue req: 430
- 2025-07-20 15:29:31,041 - sglang - INFO - [2025-07-20 15:29:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 429
- 2025-07-20 15:29:31,041 - __main__ - INFO - sglang running req: 9 queue req: 429
- 2025-07-20 15:29:31,765 - sglang - INFO - [2025-07-20 15:29:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2047, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 428
- 2025-07-20 15:29:31,766 - __main__ - INFO - sglang running req: 9 queue req: 428
- 2025-07-20 15:29:33,115 - sglang - INFO - [2025-07-20 15:29:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 427
- 2025-07-20 15:29:33,115 - __main__ - INFO - sglang running req: 9 queue req: 427
- 2025-07-20 15:29:33,679 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:33,680 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 608.52 608.52
- sglang_output_tokens 172.68 172.68
- 2025-07-20 15:29:33,680 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 73 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:34,121 - sglang - INFO - [2025-07-20 15:29:34 TP0] Decode batch. #running-req: 10, #token: 27860, token usage: 0.73, gen throughput (token/s): 125.88, #queue-req: 427
- 2025-07-20 15:29:34,121 - __main__ - INFO - sglang running req: 10 queue req: 427
- 2025-07-20 15:29:35,102 - sglang - INFO - [2025-07-20 15:29:35 TP0] Decode batch. #running-req: 10, #token: 28260, token usage: 0.74, gen throughput (token/s): 407.78, #queue-req: 427
- 2025-07-20 15:29:35,102 - __main__ - INFO - sglang running req: 10 queue req: 427
- 2025-07-20 15:29:36,084 - sglang - INFO - [2025-07-20 15:29:36 TP0] Decode batch. #running-req: 10, #token: 28660, token usage: 0.75, gen throughput (token/s): 407.21, #queue-req: 427
- 2025-07-20 15:29:36,084 - __main__ - INFO - sglang running req: 10 queue req: 427
- 2025-07-20 15:29:37,042 - sglang - INFO - [2025-07-20 15:29:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 426
- 2025-07-20 15:29:37,042 - __main__ - INFO - sglang running req: 9 queue req: 426
- 2025-07-20 15:29:37,825 - sglang - INFO - [2025-07-20 15:29:37 TP0] Decode batch. #running-req: 10, #token: 27905, token usage: 0.73, gen throughput (token/s): 229.16, #queue-req: 426
- 2025-07-20 15:29:37,825 - __main__ - INFO - sglang running req: 10 queue req: 426
- 2025-07-20 15:29:38,806 - sglang - INFO - [2025-07-20 15:29:38 TP0] Decode batch. #running-req: 10, #token: 28305, token usage: 0.75, gen throughput (token/s): 407.67, #queue-req: 426
- 2025-07-20 15:29:38,806 - __main__ - INFO - sglang running req: 10 queue req: 426
- 2025-07-20 15:29:39,790 - sglang - INFO - [2025-07-20 15:29:39 TP0] Decode batch. #running-req: 10, #token: 28705, token usage: 0.76, gen throughput (token/s): 406.59, #queue-req: 426
- 2025-07-20 15:29:39,790 - __main__ - INFO - sglang running req: 10 queue req: 426
- 2025-07-20 15:29:40,259 - sglang - INFO - [2025-07-20 15:29:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 425
- 2025-07-20 15:29:40,259 - __main__ - INFO - sglang running req: 9 queue req: 425
- 2025-07-20 15:29:41,516 - sglang - INFO - [2025-07-20 15:29:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 424
- 2025-07-20 15:29:41,516 - __main__ - INFO - sglang running req: 9 queue req: 424
- 2025-07-20 15:29:42,360 - sglang - INFO - [2025-07-20 15:29:42 TP0] Decode batch. #running-req: 10, #token: 27638, token usage: 0.73, gen throughput (token/s): 154.83, #queue-req: 424
- 2025-07-20 15:29:42,361 - __main__ - INFO - sglang running req: 10 queue req: 424
- 2025-07-20 15:29:43,271 - sglang - INFO - [2025-07-20 15:29:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 423
- 2025-07-20 15:29:43,271 - __main__ - INFO - sglang running req: 9 queue req: 423
- 2025-07-20 15:29:43,682 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:43,683 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 623.73 623.73
- sglang_output_tokens 177.95 177.95
- 2025-07-20 15:29:43,683 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 77 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:44,156 - sglang - INFO - [2025-07-20 15:29:44 TP0] Decode batch. #running-req: 10, #token: 27178, token usage: 0.72, gen throughput (token/s): 222.21, #queue-req: 423
- 2025-07-20 15:29:44,156 - __main__ - INFO - sglang running req: 10 queue req: 423
- 2025-07-20 15:29:45,136 - sglang - INFO - [2025-07-20 15:29:45 TP0] Decode batch. #running-req: 10, #token: 27578, token usage: 0.73, gen throughput (token/s): 408.24, #queue-req: 423
- 2025-07-20 15:29:45,136 - __main__ - INFO - sglang running req: 10 queue req: 423
- 2025-07-20 15:29:45,455 - sglang - INFO - [2025-07-20 15:29:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3896, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 421
- 2025-07-20 15:29:45,455 - __main__ - INFO - sglang running req: 9 queue req: 421
- 2025-07-20 15:29:47,390 - sglang - INFO - [2025-07-20 15:29:47 TP0] Decode batch. #running-req: 11, #token: 29010, token usage: 0.76, gen throughput (token/s): 188.98, #queue-req: 421
- 2025-07-20 15:29:47,390 - __main__ - INFO - sglang running req: 11 queue req: 421
- 2025-07-20 15:29:48,377 - sglang - INFO - [2025-07-20 15:29:48 TP0] Decode batch. #running-req: 11, #token: 29450, token usage: 0.78, gen throughput (token/s): 445.92, #queue-req: 421
- 2025-07-20 15:29:48,377 - __main__ - INFO - sglang running req: 11 queue req: 421
- 2025-07-20 15:29:49,363 - sglang - INFO - [2025-07-20 15:29:49 TP0] Decode batch. #running-req: 11, #token: 29890, token usage: 0.79, gen throughput (token/s): 446.11, #queue-req: 421
- 2025-07-20 15:29:49,363 - __main__ - INFO - sglang running req: 11 queue req: 421
- 2025-07-20 15:29:50,351 - sglang - INFO - [2025-07-20 15:29:50 TP0] Decode batch. #running-req: 11, #token: 30330, token usage: 0.80, gen throughput (token/s): 445.24, #queue-req: 421
- 2025-07-20 15:29:50,351 - __main__ - INFO - sglang running req: 11 queue req: 421
- 2025-07-20 15:29:50,748 - sglang - INFO - [2025-07-20 15:29:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 420
- 2025-07-20 15:29:50,748 - __main__ - INFO - sglang running req: 10 queue req: 420
- 2025-07-20 15:29:52,138 - sglang - INFO - [2025-07-20 15:29:52 TP0] Decode batch. #running-req: 11, #token: 30863, token usage: 0.81, gen throughput (token/s): 245.78, #queue-req: 420
- 2025-07-20 15:29:52,138 - __main__ - INFO - sglang running req: 11 queue req: 420
- 2025-07-20 15:29:52,361 - sglang - INFO - [2025-07-20 15:29:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 419
- 2025-07-20 15:29:52,361 - __main__ - INFO - sglang running req: 10 queue req: 419
- 2025-07-20 15:29:53,685 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:29:53,685 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 624.13 624.13
- sglang_output_tokens 178.09 178.09
- 2025-07-20 15:29:53,686 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 80 | 500
- 1 | 0 | 10
- 2025-07-20 15:29:53,742 - sglang - INFO - [2025-07-20 15:29:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 418
- 2025-07-20 15:29:53,742 - __main__ - INFO - sglang running req: 10 queue req: 418
- 2025-07-20 15:29:54,723 - sglang - INFO - [2025-07-20 15:29:54 TP0] Decode batch. #running-req: 11, #token: 32279, token usage: 0.85, gen throughput (token/s): 169.40, #queue-req: 418
- 2025-07-20 15:29:54,723 - __main__ - INFO - sglang running req: 11 queue req: 418
- 2025-07-20 15:29:55,719 - sglang - INFO - [2025-07-20 15:29:55 TP0] Decode batch. #running-req: 11, #token: 32719, token usage: 0.86, gen throughput (token/s): 441.73, #queue-req: 418
- 2025-07-20 15:29:55,719 - __main__ - INFO - sglang running req: 11 queue req: 418
- 2025-07-20 15:29:56,715 - sglang - INFO - [2025-07-20 15:29:56 TP0] Decode batch. #running-req: 11, #token: 33159, token usage: 0.87, gen throughput (token/s): 441.95, #queue-req: 418
- 2025-07-20 15:29:56,715 - __main__ - INFO - sglang running req: 11 queue req: 418
- 2025-07-20 15:29:57,437 - sglang - INFO - [2025-07-20 15:29:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2397, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 417
- 2025-07-20 15:29:57,437 - __main__ - INFO - sglang running req: 10 queue req: 417
- 2025-07-20 15:29:58,464 - sglang - INFO - [2025-07-20 15:29:58 TP0] Decode batch. #running-req: 11, #token: 32508, token usage: 0.86, gen throughput (token/s): 251.00, #queue-req: 417
- 2025-07-20 15:29:58,464 - __main__ - INFO - sglang running req: 11 queue req: 417
- 2025-07-20 15:29:59,461 - sglang - INFO - [2025-07-20 15:29:59 TP0] Decode batch. #running-req: 11, #token: 32948, token usage: 0.87, gen throughput (token/s): 441.17, #queue-req: 417
- 2025-07-20 15:29:59,461 - __main__ - INFO - sglang running req: 11 queue req: 417
- 2025-07-20 15:30:00,459 - sglang - INFO - [2025-07-20 15:30:00 TP0] Decode batch. #running-req: 11, #token: 33388, token usage: 0.88, gen throughput (token/s): 440.95, #queue-req: 417
- 2025-07-20 15:30:00,459 - __main__ - INFO - sglang running req: 11 queue req: 417
- 2025-07-20 15:30:01,457 - sglang - INFO - [2025-07-20 15:30:01 TP0] Decode batch. #running-req: 11, #token: 33828, token usage: 0.89, gen throughput (token/s): 440.77, #queue-req: 417
- 2025-07-20 15:30:01,457 - __main__ - INFO - sglang running req: 11 queue req: 417
- 2025-07-20 15:30:02,449 - sglang - INFO - [2025-07-20 15:30:02 TP0] Decode batch. #running-req: 10, #token: 28074, token usage: 0.74, gen throughput (token/s): 413.26, #queue-req: 417
- 2025-07-20 15:30:02,450 - __main__ - INFO - sglang running req: 10 queue req: 417
- 2025-07-20 15:30:02,474 - sglang - INFO - [2025-07-20 15:30:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2765, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 416
- 2025-07-20 15:30:02,474 - __main__ - INFO - sglang running req: 9 queue req: 416
- 2025-07-20 15:30:03,532 - sglang - INFO - [2025-07-20 15:30:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1862, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 415
- 2025-07-20 15:30:03,532 - __main__ - INFO - sglang running req: 9 queue req: 415
- 2025-07-20 15:30:03,687 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:03,687 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 641.55 662.63
- sglang_output_tokens 183.23 189.25
- 2025-07-20 15:30:03,687 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 85 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:04,890 - sglang - INFO - [2025-07-20 15:30:04 TP0] Decode batch. #running-req: 10, #token: 29382, token usage: 0.77, gen throughput (token/s): 163.07, #queue-req: 415
- 2025-07-20 15:30:04,890 - __main__ - INFO - sglang running req: 10 queue req: 415
- 2025-07-20 15:30:05,871 - sglang - INFO - [2025-07-20 15:30:05 TP0] Decode batch. #running-req: 10, #token: 29782, token usage: 0.78, gen throughput (token/s): 407.75, #queue-req: 415
- 2025-07-20 15:30:05,871 - __main__ - INFO - sglang running req: 10 queue req: 415
- 2025-07-20 15:30:06,856 - sglang - INFO - [2025-07-20 15:30:06 TP0] Decode batch. #running-req: 10, #token: 30182, token usage: 0.79, gen throughput (token/s): 406.33, #queue-req: 415
- 2025-07-20 15:30:06,856 - __main__ - INFO - sglang running req: 10 queue req: 415
- 2025-07-20 15:30:07,843 - sglang - INFO - [2025-07-20 15:30:07 TP0] Decode batch. #running-req: 10, #token: 30582, token usage: 0.81, gen throughput (token/s): 405.03, #queue-req: 415
- 2025-07-20 15:30:07,843 - __main__ - INFO - sglang running req: 10 queue req: 415
- 2025-07-20 15:30:07,918 - sglang - INFO - [2025-07-20 15:30:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 414
- 2025-07-20 15:30:07,918 - __main__ - INFO - sglang running req: 9 queue req: 414
- 2025-07-20 15:30:09,586 - sglang - INFO - [2025-07-20 15:30:09 TP0] Decode batch. #running-req: 10, #token: 30044, token usage: 0.79, gen throughput (token/s): 228.89, #queue-req: 414
- 2025-07-20 15:30:09,586 - __main__ - INFO - sglang running req: 10 queue req: 414
- 2025-07-20 15:30:10,627 - sglang - INFO - [2025-07-20 15:30:10 TP0] Decode batch. #running-req: 10, #token: 30444, token usage: 0.80, gen throughput (token/s): 384.31, #queue-req: 414
- 2025-07-20 15:30:10,627 - __main__ - INFO - sglang running req: 10 queue req: 414
- 2025-07-20 15:30:10,948 - sglang - INFO - [2025-07-20 15:30:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2506, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 413
- 2025-07-20 15:30:10,949 - __main__ - INFO - sglang running req: 9 queue req: 413
- 2025-07-20 15:30:12,370 - sglang - INFO - [2025-07-20 15:30:12 TP0] Decode batch. #running-req: 10, #token: 29750, token usage: 0.78, gen throughput (token/s): 228.93, #queue-req: 413
- 2025-07-20 15:30:12,370 - __main__ - INFO - sglang running req: 10 queue req: 413
- 2025-07-20 15:30:13,355 - sglang - INFO - [2025-07-20 15:30:13 TP0] Decode batch. #running-req: 10, #token: 30150, token usage: 0.79, gen throughput (token/s): 406.20, #queue-req: 413
- 2025-07-20 15:30:13,355 - __main__ - INFO - sglang running req: 10 queue req: 413
- 2025-07-20 15:30:13,689 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:13,689 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 637.87 680.10
- sglang_output_tokens 182.93 195.04
- 2025-07-20 15:30:13,689 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 87 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:14,071 - sglang - INFO - [2025-07-20 15:30:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2601, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 412
- 2025-07-20 15:30:14,071 - __main__ - INFO - sglang running req: 9 queue req: 412
- 2025-07-20 15:30:15,133 - sglang - INFO - [2025-07-20 15:30:15 TP0] Decode batch. #running-req: 10, #token: 29491, token usage: 0.78, gen throughput (token/s): 224.31, #queue-req: 412
- 2025-07-20 15:30:15,134 - __main__ - INFO - sglang running req: 10 queue req: 412
- 2025-07-20 15:30:15,183 - sglang - INFO - [2025-07-20 15:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2203, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 411
- 2025-07-20 15:30:15,184 - __main__ - INFO - sglang running req: 9 queue req: 411
- 2025-07-20 15:30:15,984 - sglang - INFO - [2025-07-20 15:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1564, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 410
- 2025-07-20 15:30:15,984 - __main__ - INFO - sglang running req: 9 queue req: 410
- 2025-07-20 15:30:17,426 - sglang - INFO - [2025-07-20 15:30:17 TP0] Decode batch. #running-req: 10, #token: 27380, token usage: 0.72, gen throughput (token/s): 173.57, #queue-req: 410
- 2025-07-20 15:30:17,427 - __main__ - INFO - sglang running req: 10 queue req: 410
- 2025-07-20 15:30:17,918 - sglang - INFO - [2025-07-20 15:30:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 409
- 2025-07-20 15:30:17,918 - __main__ - INFO - sglang running req: 9 queue req: 409
- 2025-07-20 15:30:19,246 - sglang - INFO - [2025-07-20 15:30:19 TP0] Decode batch. #running-req: 10, #token: 28450, token usage: 0.75, gen throughput (token/s): 219.29, #queue-req: 409
- 2025-07-20 15:30:19,246 - __main__ - INFO - sglang running req: 10 queue req: 409
- 2025-07-20 15:30:20,225 - sglang - INFO - [2025-07-20 15:30:20 TP0] Decode batch. #running-req: 10, #token: 25355, token usage: 0.67, gen throughput (token/s): 408.51, #queue-req: 409
- 2025-07-20 15:30:20,225 - __main__ - INFO - sglang running req: 10 queue req: 409
- 2025-07-20 15:30:20,250 - sglang - INFO - [2025-07-20 15:30:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 408
- 2025-07-20 15:30:20,250 - __main__ - INFO - sglang running req: 9 queue req: 408
- 2025-07-20 15:30:21,468 - sglang - INFO - [2025-07-20 15:30:21 TP0] Prefill batch. #new-seq: 2, #new-token: 4154, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 406
- 2025-07-20 15:30:21,468 - __main__ - INFO - sglang running req: 9 queue req: 406
- 2025-07-20 15:30:23,291 - sglang - INFO - [2025-07-20 15:30:23 TP0] Decode batch. #running-req: 11, #token: 29069, token usage: 0.77, gen throughput (token/s): 136.33, #queue-req: 406
- 2025-07-20 15:30:23,291 - __main__ - INFO - sglang running req: 11 queue req: 406
- 2025-07-20 15:30:23,690 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:23,690 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 662.78 728.76
- sglang_output_tokens 190.54 209.51
- 2025-07-20 15:30:23,691 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 93 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:24,278 - sglang - INFO - [2025-07-20 15:30:24 TP0] Decode batch. #running-req: 11, #token: 29509, token usage: 0.78, gen throughput (token/s): 445.83, #queue-req: 406
- 2025-07-20 15:30:24,278 - __main__ - INFO - sglang running req: 11 queue req: 406
- 2025-07-20 15:30:25,266 - sglang - INFO - [2025-07-20 15:30:25 TP0] Decode batch. #running-req: 11, #token: 29949, token usage: 0.79, gen throughput (token/s): 445.25, #queue-req: 406
- 2025-07-20 15:30:25,266 - __main__ - INFO - sglang running req: 11 queue req: 406
- 2025-07-20 15:30:25,934 - sglang - INFO - [2025-07-20 15:30:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2035, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 405
- 2025-07-20 15:30:25,935 - __main__ - INFO - sglang running req: 10 queue req: 405
- 2025-07-20 15:30:26,917 - sglang - INFO - [2025-07-20 15:30:26 TP0] Decode batch. #running-req: 11, #token: 30598, token usage: 0.81, gen throughput (token/s): 266.02, #queue-req: 405
- 2025-07-20 15:30:26,917 - __main__ - INFO - sglang running req: 11 queue req: 405
- 2025-07-20 15:30:27,906 - sglang - INFO - [2025-07-20 15:30:27 TP0] Decode batch. #running-req: 11, #token: 31038, token usage: 0.82, gen throughput (token/s): 444.75, #queue-req: 405
- 2025-07-20 15:30:27,906 - __main__ - INFO - sglang running req: 11 queue req: 405
- 2025-07-20 15:30:28,897 - sglang - INFO - [2025-07-20 15:30:28 TP0] Decode batch. #running-req: 11, #token: 31478, token usage: 0.83, gen throughput (token/s): 443.81, #queue-req: 405
- 2025-07-20 15:30:28,897 - __main__ - INFO - sglang running req: 11 queue req: 405
- 2025-07-20 15:30:29,891 - sglang - INFO - [2025-07-20 15:30:29 TP0] Decode batch. #running-req: 11, #token: 31918, token usage: 0.84, gen throughput (token/s): 442.91, #queue-req: 405
- 2025-07-20 15:30:29,891 - __main__ - INFO - sglang running req: 11 queue req: 405
- 2025-07-20 15:30:30,313 - sglang - INFO - [2025-07-20 15:30:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 404
- 2025-07-20 15:30:30,313 - __main__ - INFO - sglang running req: 10 queue req: 404
- 2025-07-20 15:30:31,573 - sglang - INFO - [2025-07-20 15:30:31 TP0] Decode batch. #running-req: 11, #token: 32260, token usage: 0.85, gen throughput (token/s): 260.87, #queue-req: 404
- 2025-07-20 15:30:31,574 - __main__ - INFO - sglang running req: 11 queue req: 404
- 2025-07-20 15:30:32,569 - sglang - INFO - [2025-07-20 15:30:32 TP0] Decode batch. #running-req: 10, #token: 29096, token usage: 0.77, gen throughput (token/s): 440.93, #queue-req: 404
- 2025-07-20 15:30:32,569 - __main__ - INFO - sglang running req: 10 queue req: 404
- 2025-07-20 15:30:32,569 - sglang - INFO - [2025-07-20 15:30:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 403
- 2025-07-20 15:30:32,570 - __main__ - INFO - sglang running req: 10 queue req: 403
- 2025-07-20 15:30:33,692 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:33,692 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 661.19 749.05
- sglang_output_tokens 188.99 214.10
- 2025-07-20 15:30:33,693 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 96 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:34,373 - sglang - INFO - [2025-07-20 15:30:34 TP0] Decode batch. #running-req: 11, #token: 32282, token usage: 0.85, gen throughput (token/s): 243.95, #queue-req: 403
- 2025-07-20 15:30:34,373 - __main__ - INFO - sglang running req: 11 queue req: 403
- 2025-07-20 15:30:35,367 - sglang - INFO - [2025-07-20 15:30:35 TP0] Decode batch. #running-req: 11, #token: 32722, token usage: 0.86, gen throughput (token/s): 442.79, #queue-req: 403
- 2025-07-20 15:30:35,367 - __main__ - INFO - sglang running req: 11 queue req: 403
- 2025-07-20 15:30:36,360 - sglang - INFO - [2025-07-20 15:30:36 TP0] Decode batch. #running-req: 10, #token: 29923, token usage: 0.79, gen throughput (token/s): 433.65, #queue-req: 403
- 2025-07-20 15:30:36,361 - __main__ - INFO - sglang running req: 10 queue req: 403
- 2025-07-20 15:30:36,533 - sglang - INFO - [2025-07-20 15:30:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 402
- 2025-07-20 15:30:36,533 - __main__ - INFO - sglang running req: 9 queue req: 402
- 2025-07-20 15:30:38,184 - sglang - INFO - [2025-07-20 15:30:38 TP0] Decode batch. #running-req: 10, #token: 30043, token usage: 0.79, gen throughput (token/s): 218.75, #queue-req: 402
- 2025-07-20 15:30:38,185 - __main__ - INFO - sglang running req: 10 queue req: 402
- 2025-07-20 15:30:38,603 - sglang - INFO - [2025-07-20 15:30:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2916, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 401
- 2025-07-20 15:30:38,604 - __main__ - INFO - sglang running req: 9 queue req: 401
- 2025-07-20 15:30:39,997 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:30:40,008 - sglang - INFO - [2025-07-20 15:30:40 TP0] Decode batch. #running-req: 9, #token: 25854, token usage: 0.68, gen throughput (token/s): 218.21, #queue-req: 401
- 2025-07-20 15:30:40,008 - __main__ - INFO - sglang running req: 9 queue req: 401
- 2025-07-20 15:30:40,009 - sglang - INFO - [2025-07-20 15:30:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 400
- 2025-07-20 15:30:40,009 - __main__ - INFO - sglang running req: 9 queue req: 400
- 2025-07-20 15:30:40,263 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
- 2025-07-20 15:30:41,804 - sglang - INFO - [2025-07-20 15:30:41 TP0] Decode batch. #running-req: 10, #token: 29002, token usage: 0.76, gen throughput (token/s): 222.74, #queue-req: 401
- 2025-07-20 15:30:41,804 - __main__ - INFO - sglang running req: 10 queue req: 401
- 2025-07-20 15:30:42,783 - sglang - INFO - [2025-07-20 15:30:42 TP0] Decode batch. #running-req: 10, #token: 29402, token usage: 0.77, gen throughput (token/s): 408.78, #queue-req: 401
- 2025-07-20 15:30:42,783 - __main__ - INFO - sglang running req: 10 queue req: 401
- 2025-07-20 15:30:43,028 - sglang - INFO - [2025-07-20 15:30:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 400
- 2025-07-20 15:30:43,028 - __main__ - INFO - sglang running req: 9 queue req: 400
- 2025-07-20 15:30:43,694 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:43,694 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 678.41 791.17
- sglang_output_tokens 195.85 228.40
- 2025-07-20 15:30:43,695 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 100 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:44,598 - sglang - INFO - [2025-07-20 15:30:44 TP0] Decode batch. #running-req: 10, #token: 29691, token usage: 0.78, gen throughput (token/s): 219.76, #queue-req: 400
- 2025-07-20 15:30:44,598 - __main__ - INFO - sglang running req: 10 queue req: 400
- 2025-07-20 15:30:44,771 - sglang - INFO - [2025-07-20 15:30:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 399
- 2025-07-20 15:30:44,771 - __main__ - INFO - sglang running req: 9 queue req: 399
- 2025-07-20 15:30:46,317 - sglang - INFO - [2025-07-20 15:30:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1420, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 398
- 2025-07-20 15:30:46,318 - __main__ - INFO - sglang running req: 9 queue req: 398
- 2025-07-20 15:30:46,895 - sglang - INFO - [2025-07-20 15:30:46 TP0] Decode batch. #running-req: 10, #token: 29051, token usage: 0.76, gen throughput (token/s): 173.32, #queue-req: 398
- 2025-07-20 15:30:46,895 - __main__ - INFO - sglang running req: 10 queue req: 398
- 2025-07-20 15:30:47,879 - sglang - INFO - [2025-07-20 15:30:47 TP0] Decode batch. #running-req: 10, #token: 29451, token usage: 0.78, gen throughput (token/s): 406.39, #queue-req: 398
- 2025-07-20 15:30:47,879 - __main__ - INFO - sglang running req: 10 queue req: 398
- 2025-07-20 15:30:48,864 - sglang - INFO - [2025-07-20 15:30:48 TP0] Decode batch. #running-req: 10, #token: 29851, token usage: 0.79, gen throughput (token/s): 406.00, #queue-req: 398
- 2025-07-20 15:30:48,865 - __main__ - INFO - sglang running req: 10 queue req: 398
- 2025-07-20 15:30:49,258 - sglang - INFO - [2025-07-20 15:30:49 TP0] Prefill batch. #new-seq: 2, #new-token: 3316, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 396
- 2025-07-20 15:30:49,258 - __main__ - INFO - sglang running req: 9 queue req: 396
- 2025-07-20 15:30:50,814 - sglang - INFO - [2025-07-20 15:30:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1991, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 395
- 2025-07-20 15:30:50,814 - __main__ - INFO - sglang running req: 10 queue req: 395
- 2025-07-20 15:30:51,645 - sglang - INFO - [2025-07-20 15:30:51 TP0] Decode batch. #running-req: 11, #token: 28561, token usage: 0.75, gen throughput (token/s): 151.74, #queue-req: 395
- 2025-07-20 15:30:51,645 - __main__ - INFO - sglang running req: 11 queue req: 395
- 2025-07-20 15:30:52,633 - sglang - INFO - [2025-07-20 15:30:52 TP0] Decode batch. #running-req: 11, #token: 29001, token usage: 0.76, gen throughput (token/s): 445.62, #queue-req: 395
- 2025-07-20 15:30:52,633 - __main__ - INFO - sglang running req: 11 queue req: 395
- 2025-07-20 15:30:52,781 - sglang - INFO - [2025-07-20 15:30:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 394
- 2025-07-20 15:30:52,781 - __main__ - INFO - sglang running req: 10 queue req: 394
- 2025-07-20 15:30:53,697 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:30:53,697 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 692.01 830.11
- sglang_output_tokens 200.23 240.19
- 2025-07-20 15:30:53,697 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 105 | 500
- 1 | 0 | 10
- 2025-07-20 15:30:54,376 - sglang - INFO - [2025-07-20 15:30:54 TP0] Decode batch. #running-req: 11, #token: 28675, token usage: 0.75, gen throughput (token/s): 251.85, #queue-req: 394
- 2025-07-20 15:30:54,376 - __main__ - INFO - sglang running req: 11 queue req: 394
- 2025-07-20 15:30:54,795 - sglang - INFO - [2025-07-20 15:30:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2105, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 393
- 2025-07-20 15:30:54,796 - __main__ - INFO - sglang running req: 10 queue req: 393
- 2025-07-20 15:30:55,813 - sglang - INFO - [2025-07-20 15:30:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 392
- 2025-07-20 15:30:55,814 - __main__ - INFO - sglang running req: 10 queue req: 392
- 2025-07-20 15:30:56,875 - sglang - INFO - [2025-07-20 15:30:56 TP0] Decode batch. #running-req: 11, #token: 31009, token usage: 0.82, gen throughput (token/s): 175.26, #queue-req: 392
- 2025-07-20 15:30:56,875 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:30:57,865 - sglang - INFO - [2025-07-20 15:30:57 TP0] Decode batch. #running-req: 11, #token: 31449, token usage: 0.83, gen throughput (token/s): 444.28, #queue-req: 392
- 2025-07-20 15:30:57,865 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:30:58,913 - sglang - INFO - [2025-07-20 15:30:58 TP0] Decode batch. #running-req: 11, #token: 31889, token usage: 0.84, gen throughput (token/s): 419.80, #queue-req: 392
- 2025-07-20 15:30:58,913 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:30:59,911 - sglang - INFO - [2025-07-20 15:30:59 TP0] Decode batch. #running-req: 11, #token: 32329, token usage: 0.85, gen throughput (token/s): 441.24, #queue-req: 392
- 2025-07-20 15:30:59,911 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:31:00,961 - sglang - INFO - [2025-07-20 15:31:00 TP0] Decode batch. #running-req: 11, #token: 32769, token usage: 0.86, gen throughput (token/s): 419.07, #queue-req: 392
- 2025-07-20 15:31:00,961 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:31:01,959 - sglang - INFO - [2025-07-20 15:31:01 TP0] Decode batch. #running-req: 11, #token: 33209, token usage: 0.87, gen throughput (token/s): 440.84, #queue-req: 392
- 2025-07-20 15:31:01,959 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:31:02,956 - sglang - INFO - [2025-07-20 15:31:02 TP0] Decode batch. #running-req: 11, #token: 33649, token usage: 0.89, gen throughput (token/s): 440.90, #queue-req: 392
- 2025-07-20 15:31:02,957 - __main__ - INFO - sglang running req: 11 queue req: 392
- 2025-07-20 15:31:03,481 - sglang - INFO - [2025-07-20 15:31:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 391
- 2025-07-20 15:31:03,481 - __main__ - INFO - sglang running req: 10 queue req: 391
- 2025-07-20 15:31:03,698 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:03,698 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 688.29 848.59
- sglang_output_tokens 198.10 244.24
- 2025-07-20 15:31:03,698 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 108 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:04,625 - sglang - INFO - [2025-07-20 15:31:04 TP0] Decode batch. #running-req: 10, #token: 30265, token usage: 0.80, gen throughput (token/s): 254.16, #queue-req: 391
- 2025-07-20 15:31:04,625 - __main__ - INFO - sglang running req: 10 queue req: 391
- 2025-07-20 15:31:05,607 - sglang - INFO - [2025-07-20 15:31:05 TP0] Decode batch. #running-req: 10, #token: 30665, token usage: 0.81, gen throughput (token/s): 407.03, #queue-req: 391
- 2025-07-20 15:31:05,608 - __main__ - INFO - sglang running req: 10 queue req: 391
- 2025-07-20 15:31:06,595 - sglang - INFO - [2025-07-20 15:31:06 TP0] Decode batch. #running-req: 10, #token: 31065, token usage: 0.82, gen throughput (token/s): 404.96, #queue-req: 391
- 2025-07-20 15:31:06,595 - __main__ - INFO - sglang running req: 10 queue req: 391
- 2025-07-20 15:31:06,819 - sglang - INFO - [2025-07-20 15:31:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 390
- 2025-07-20 15:31:06,819 - __main__ - INFO - sglang running req: 9 queue req: 390
- 2025-07-20 15:31:08,421 - sglang - INFO - [2025-07-20 15:31:08 TP0] Decode batch. #running-req: 10, #token: 31852, token usage: 0.84, gen throughput (token/s): 218.58, #queue-req: 390
- 2025-07-20 15:31:08,421 - __main__ - INFO - sglang running req: 10 queue req: 390
- 2025-07-20 15:31:08,842 - sglang - INFO - [2025-07-20 15:31:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 389
- 2025-07-20 15:31:08,842 - __main__ - INFO - sglang running req: 9 queue req: 389
- 2025-07-20 15:31:10,201 - sglang - INFO - [2025-07-20 15:31:10 TP0] Decode batch. #running-req: 10, #token: 30795, token usage: 0.81, gen throughput (token/s): 224.15, #queue-req: 389
- 2025-07-20 15:31:10,201 - __main__ - INFO - sglang running req: 10 queue req: 389
- 2025-07-20 15:31:10,374 - sglang - INFO - [2025-07-20 15:31:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2601, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 388
- 2025-07-20 15:31:10,374 - __main__ - INFO - sglang running req: 9 queue req: 388
- 2025-07-20 15:31:11,462 - sglang - INFO - [2025-07-20 15:31:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1910, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 387
- 2025-07-20 15:31:11,463 - __main__ - INFO - sglang running req: 9 queue req: 387
- 2025-07-20 15:31:12,410 - sglang - INFO - [2025-07-20 15:31:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 386
- 2025-07-20 15:31:12,410 - __main__ - INFO - sglang running req: 9 queue req: 386
- 2025-07-20 15:31:13,359 - sglang - INFO - [2025-07-20 15:31:13 TP0] Decode batch. #running-req: 10, #token: 28006, token usage: 0.74, gen throughput (token/s): 125.70, #queue-req: 386
- 2025-07-20 15:31:13,359 - __main__ - INFO - sglang running req: 10 queue req: 386
- 2025-07-20 15:31:13,699 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:13,699 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 708.70 897.38
- sglang_output_tokens 203.46 257.63
- 2025-07-20 15:31:13,700 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 114 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:14,343 - sglang - INFO - [2025-07-20 15:31:14 TP0] Decode batch. #running-req: 10, #token: 28406, token usage: 0.75, gen throughput (token/s): 406.66, #queue-req: 386
- 2025-07-20 15:31:14,343 - __main__ - INFO - sglang running req: 10 queue req: 386
- 2025-07-20 15:31:14,984 - sglang - INFO - [2025-07-20 15:31:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 385
- 2025-07-20 15:31:14,985 - __main__ - INFO - sglang running req: 9 queue req: 385
- 2025-07-20 15:31:16,121 - sglang - INFO - [2025-07-20 15:31:16 TP0] Decode batch. #running-req: 10, #token: 28182, token usage: 0.74, gen throughput (token/s): 224.38, #queue-req: 385
- 2025-07-20 15:31:16,121 - __main__ - INFO - sglang running req: 10 queue req: 385
- 2025-07-20 15:31:17,056 - sglang - INFO - [2025-07-20 15:31:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 384
- 2025-07-20 15:31:17,056 - __main__ - INFO - sglang running req: 9 queue req: 384
- 2025-07-20 15:31:17,779 - sglang - INFO - [2025-07-20 15:31:17 TP0] Decode batch. #running-req: 10, #token: 26960, token usage: 0.71, gen throughput (token/s): 240.69, #queue-req: 384
- 2025-07-20 15:31:17,779 - __main__ - INFO - sglang running req: 10 queue req: 384
- 2025-07-20 15:31:18,758 - sglang - INFO - [2025-07-20 15:31:18 TP0] Decode batch. #running-req: 10, #token: 27360, token usage: 0.72, gen throughput (token/s): 408.35, #queue-req: 384
- 2025-07-20 15:31:18,758 - __main__ - INFO - sglang running req: 10 queue req: 384
- 2025-07-20 15:31:19,394 - sglang - INFO - [2025-07-20 15:31:19 TP0] Prefill batch. #new-seq: 2, #new-token: 4498, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 382
- 2025-07-20 15:31:19,394 - __main__ - INFO - sglang running req: 9 queue req: 382
- 2025-07-20 15:31:21,152 - sglang - INFO - [2025-07-20 15:31:21 TP0] Decode batch. #running-req: 11, #token: 29038, token usage: 0.76, gen throughput (token/s): 172.55, #queue-req: 382
- 2025-07-20 15:31:21,152 - __main__ - INFO - sglang running req: 11 queue req: 382
- 2025-07-20 15:31:22,140 - sglang - INFO - [2025-07-20 15:31:22 TP0] Decode batch. #running-req: 11, #token: 29478, token usage: 0.78, gen throughput (token/s): 444.96, #queue-req: 382
- 2025-07-20 15:31:22,141 - __main__ - INFO - sglang running req: 11 queue req: 382
- 2025-07-20 15:31:23,130 - sglang - INFO - [2025-07-20 15:31:23 TP0] Decode batch. #running-req: 11, #token: 29918, token usage: 0.79, gen throughput (token/s): 444.63, #queue-req: 382
- 2025-07-20 15:31:23,130 - __main__ - INFO - sglang running req: 11 queue req: 382
- 2025-07-20 15:31:23,700 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:23,701 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 710.60 923.48
- sglang_output_tokens 204.57 265.86
- 2025-07-20 15:31:23,701 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 117 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:24,120 - sglang - INFO - [2025-07-20 15:31:24 TP0] Decode batch. #running-req: 11, #token: 30358, token usage: 0.80, gen throughput (token/s): 444.37, #queue-req: 382
- 2025-07-20 15:31:24,120 - __main__ - INFO - sglang running req: 11 queue req: 382
- 2025-07-20 15:31:25,111 - sglang - INFO - [2025-07-20 15:31:25 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 444.03, #queue-req: 382
- 2025-07-20 15:31:25,111 - __main__ - INFO - sglang running req: 11 queue req: 382
- 2025-07-20 15:31:25,930 - sglang - INFO - [2025-07-20 15:31:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2719, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 381
- 2025-07-20 15:31:25,930 - __main__ - INFO - sglang running req: 10 queue req: 381
- 2025-07-20 15:31:26,910 - sglang - INFO - [2025-07-20 15:31:26 TP0] Decode batch. #running-req: 11, #token: 30136, token usage: 0.79, gen throughput (token/s): 244.02, #queue-req: 381
- 2025-07-20 15:31:26,910 - __main__ - INFO - sglang running req: 11 queue req: 381
- 2025-07-20 15:31:27,897 - sglang - INFO - [2025-07-20 15:31:27 TP0] Decode batch. #running-req: 11, #token: 30576, token usage: 0.80, gen throughput (token/s): 445.84, #queue-req: 381
- 2025-07-20 15:31:27,897 - __main__ - INFO - sglang running req: 11 queue req: 381
- 2025-07-20 15:31:28,691 - sglang - INFO - [2025-07-20 15:31:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 380
- 2025-07-20 15:31:28,691 - __main__ - INFO - sglang running req: 10 queue req: 380
- 2025-07-20 15:31:29,710 - sglang - INFO - [2025-07-20 15:31:29 TP0] Decode batch. #running-req: 11, #token: 30919, token usage: 0.81, gen throughput (token/s): 242.16, #queue-req: 380
- 2025-07-20 15:31:29,710 - __main__ - INFO - sglang running req: 11 queue req: 380
- 2025-07-20 15:31:29,909 - sglang - INFO - [2025-07-20 15:31:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 379
- 2025-07-20 15:31:29,909 - __main__ - INFO - sglang running req: 10 queue req: 379
- 2025-07-20 15:31:31,215 - sglang - INFO - [2025-07-20 15:31:31 TP0] Decode batch. #running-req: 11, #token: 29777, token usage: 0.78, gen throughput (token/s): 291.70, #queue-req: 379
- 2025-07-20 15:31:31,215 - __main__ - INFO - sglang running req: 11 queue req: 379
- 2025-07-20 15:31:31,870 - sglang - INFO - [2025-07-20 15:31:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 378
- 2025-07-20 15:31:31,870 - __main__ - INFO - sglang running req: 10 queue req: 378
- 2025-07-20 15:31:33,031 - sglang - INFO - [2025-07-20 15:31:33 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 241.75, #queue-req: 378
- 2025-07-20 15:31:33,031 - __main__ - INFO - sglang running req: 11 queue req: 378
- 2025-07-20 15:31:33,607 - sglang - INFO - [2025-07-20 15:31:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2203, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 377
- 2025-07-20 15:31:33,607 - __main__ - INFO - sglang running req: 10 queue req: 377
- 2025-07-20 15:31:33,702 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:33,703 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 720.43 960.27
- sglang_output_tokens 207.03 275.96
- 2025-07-20 15:31:33,703 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 122 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:34,385 - sglang - INFO - [2025-07-20 15:31:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 376
- 2025-07-20 15:31:34,385 - __main__ - INFO - sglang running req: 10 queue req: 376
- 2025-07-20 15:31:35,572 - sglang - INFO - [2025-07-20 15:31:35 TP0] Decode batch. #running-req: 11, #token: 29844, token usage: 0.79, gen throughput (token/s): 172.33, #queue-req: 376
- 2025-07-20 15:31:35,573 - __main__ - INFO - sglang running req: 11 queue req: 376
- 2025-07-20 15:31:36,569 - sglang - INFO - [2025-07-20 15:31:36 TP0] Decode batch. #running-req: 11, #token: 30284, token usage: 0.80, gen throughput (token/s): 441.66, #queue-req: 376
- 2025-07-20 15:31:36,569 - __main__ - INFO - sglang running req: 11 queue req: 376
- 2025-07-20 15:31:36,718 - sglang - INFO - [2025-07-20 15:31:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2144, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 375
- 2025-07-20 15:31:36,718 - __main__ - INFO - sglang running req: 10 queue req: 375
- 2025-07-20 15:31:38,234 - sglang - INFO - [2025-07-20 15:31:38 TP0] Decode batch. #running-req: 11, #token: 31439, token usage: 0.83, gen throughput (token/s): 263.67, #queue-req: 375
- 2025-07-20 15:31:38,234 - __main__ - INFO - sglang running req: 11 queue req: 375
- 2025-07-20 15:31:39,226 - sglang - INFO - [2025-07-20 15:31:39 TP0] Decode batch. #running-req: 11, #token: 31879, token usage: 0.84, gen throughput (token/s): 443.31, #queue-req: 375
- 2025-07-20 15:31:39,227 - __main__ - INFO - sglang running req: 11 queue req: 375
- 2025-07-20 15:31:40,220 - sglang - INFO - [2025-07-20 15:31:40 TP0] Decode batch. #running-req: 11, #token: 32319, token usage: 0.85, gen throughput (token/s): 442.71, #queue-req: 375
- 2025-07-20 15:31:40,220 - __main__ - INFO - sglang running req: 11 queue req: 375
- 2025-07-20 15:31:40,444 - sglang - INFO - [2025-07-20 15:31:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 374
- 2025-07-20 15:31:40,444 - __main__ - INFO - sglang running req: 10 queue req: 374
- 2025-07-20 15:31:42,020 - sglang - INFO - [2025-07-20 15:31:42 TP0] Decode batch. #running-req: 11, #token: 32256, token usage: 0.85, gen throughput (token/s): 243.86, #queue-req: 374
- 2025-07-20 15:31:42,021 - __main__ - INFO - sglang running req: 11 queue req: 374
- 2025-07-20 15:31:42,864 - sglang - INFO - [2025-07-20 15:31:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1804, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 373
- 2025-07-20 15:31:42,864 - __main__ - INFO - sglang running req: 10 queue req: 373
- 2025-07-20 15:31:43,662 - sglang - INFO - [2025-07-20 15:31:43 TP0] Decode batch. #running-req: 11, #token: 31638, token usage: 0.83, gen throughput (token/s): 267.48, #queue-req: 373
- 2025-07-20 15:31:43,662 - __main__ - INFO - sglang running req: 11 queue req: 373
- 2025-07-20 15:31:43,703 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:43,704 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 723.32 983.69
- sglang_output_tokens 207.65 283.21
- 2025-07-20 15:31:43,704 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 126 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:44,655 - sglang - INFO - [2025-07-20 15:31:44 TP0] Decode batch. #running-req: 11, #token: 32078, token usage: 0.84, gen throughput (token/s): 443.07, #queue-req: 373
- 2025-07-20 15:31:44,655 - __main__ - INFO - sglang running req: 11 queue req: 373
- 2025-07-20 15:31:44,929 - sglang - INFO - [2025-07-20 15:31:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 372
- 2025-07-20 15:31:44,930 - __main__ - INFO - sglang running req: 10 queue req: 372
- 2025-07-20 15:31:46,440 - sglang - INFO - [2025-07-20 15:31:46 TP0] Decode batch. #running-req: 11, #token: 31630, token usage: 0.83, gen throughput (token/s): 245.87, #queue-req: 372
- 2025-07-20 15:31:46,440 - __main__ - INFO - sglang running req: 11 queue req: 372
- 2025-07-20 15:31:47,435 - sglang - INFO - [2025-07-20 15:31:47 TP0] Decode batch. #running-req: 11, #token: 32070, token usage: 0.84, gen throughput (token/s): 442.35, #queue-req: 372
- 2025-07-20 15:31:47,435 - __main__ - INFO - sglang running req: 11 queue req: 372
- 2025-07-20 15:31:48,432 - sglang - INFO - [2025-07-20 15:31:48 TP0] Decode batch. #running-req: 11, #token: 32510, token usage: 0.86, gen throughput (token/s): 441.35, #queue-req: 372
- 2025-07-20 15:31:48,432 - __main__ - INFO - sglang running req: 11 queue req: 372
- 2025-07-20 15:31:49,353 - sglang - INFO - [2025-07-20 15:31:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2077, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 371
- 2025-07-20 15:31:49,353 - __main__ - INFO - sglang running req: 10 queue req: 371
- 2025-07-20 15:31:50,112 - sglang - INFO - [2025-07-20 15:31:50 TP0] Decode batch. #running-req: 11, #token: 32156, token usage: 0.85, gen throughput (token/s): 261.26, #queue-req: 371
- 2025-07-20 15:31:50,112 - __main__ - INFO - sglang running req: 11 queue req: 371
- 2025-07-20 15:31:50,187 - sglang - INFO - [2025-07-20 15:31:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2512, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 370
- 2025-07-20 15:31:50,187 - __main__ - INFO - sglang running req: 10 queue req: 370
- 2025-07-20 15:31:51,863 - sglang - INFO - [2025-07-20 15:31:51 TP0] Decode batch. #running-req: 11, #token: 31513, token usage: 0.83, gen throughput (token/s): 250.68, #queue-req: 370
- 2025-07-20 15:31:51,863 - __main__ - INFO - sglang running req: 11 queue req: 370
- 2025-07-20 15:31:52,857 - sglang - INFO - [2025-07-20 15:31:52 TP0] Decode batch. #running-req: 11, #token: 31953, token usage: 0.84, gen throughput (token/s): 442.77, #queue-req: 370
- 2025-07-20 15:31:52,857 - __main__ - INFO - sglang running req: 11 queue req: 370
- 2025-07-20 15:31:53,706 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:31:53,706 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 724.46 996.42
- sglang_output_tokens 208.07 287.35
- 2025-07-20 15:31:53,706 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 129 | 500
- 1 | 0 | 10
- 2025-07-20 15:31:53,851 - sglang - INFO - [2025-07-20 15:31:53 TP0] Decode batch. #running-req: 11, #token: 32393, token usage: 0.85, gen throughput (token/s): 442.62, #queue-req: 370
- 2025-07-20 15:31:53,851 - __main__ - INFO - sglang running req: 11 queue req: 370
- 2025-07-20 15:31:54,846 - sglang - INFO - [2025-07-20 15:31:54 TP0] Decode batch. #running-req: 11, #token: 32833, token usage: 0.86, gen throughput (token/s): 442.12, #queue-req: 370
- 2025-07-20 15:31:54,847 - __main__ - INFO - sglang running req: 11 queue req: 370
- 2025-07-20 15:31:55,840 - sglang - INFO - [2025-07-20 15:31:55 TP0] Decode batch. #running-req: 10, #token: 30122, token usage: 0.79, gen throughput (token/s): 431.74, #queue-req: 370
- 2025-07-20 15:31:55,840 - __main__ - INFO - sglang running req: 10 queue req: 370
- 2025-07-20 15:31:56,826 - sglang - INFO - [2025-07-20 15:31:56 TP0] Decode batch. #running-req: 10, #token: 30522, token usage: 0.80, gen throughput (token/s): 405.77, #queue-req: 370
- 2025-07-20 15:31:56,826 - __main__ - INFO - sglang running req: 10 queue req: 370
- 2025-07-20 15:31:57,295 - sglang - INFO - [2025-07-20 15:31:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2714, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 369
- 2025-07-20 15:31:57,295 - __main__ - INFO - sglang running req: 9 queue req: 369
- 2025-07-20 15:31:58,126 - sglang - INFO - [2025-07-20 15:31:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2543, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 368
- 2025-07-20 15:31:58,127 - __main__ - INFO - sglang running req: 9 queue req: 368
- 2025-07-20 15:31:59,380 - sglang - INFO - [2025-07-20 15:31:59 TP0] Decode batch. #running-req: 10, #token: 30284, token usage: 0.80, gen throughput (token/s): 155.84, #queue-req: 368
- 2025-07-20 15:31:59,380 - __main__ - INFO - sglang running req: 10 queue req: 368
- 2025-07-20 15:32:00,146 - sglang - INFO - [2025-07-20 15:32:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 367
- 2025-07-20 15:32:00,146 - __main__ - INFO - sglang running req: 9 queue req: 367
- 2025-07-20 15:32:01,162 - sglang - INFO - [2025-07-20 15:32:01 TP0] Decode batch. #running-req: 9, #token: 27349, token usage: 0.72, gen throughput (token/s): 223.34, #queue-req: 367
- 2025-07-20 15:32:01,162 - __main__ - INFO - sglang running req: 9 queue req: 367
- 2025-07-20 15:32:01,162 - sglang - INFO - [2025-07-20 15:32:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2355, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 366
- 2025-07-20 15:32:01,162 - __main__ - INFO - sglang running req: 9 queue req: 366
- 2025-07-20 15:32:02,908 - sglang - INFO - [2025-07-20 15:32:02 TP0] Decode batch. #running-req: 10, #token: 30104, token usage: 0.79, gen throughput (token/s): 229.07, #queue-req: 366
- 2025-07-20 15:32:02,908 - __main__ - INFO - sglang running req: 10 queue req: 366
- 2025-07-20 15:32:03,353 - sglang - INFO - [2025-07-20 15:32:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2836, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 365
- 2025-07-20 15:32:03,354 - __main__ - INFO - sglang running req: 9 queue req: 365
- 2025-07-20 15:32:03,707 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:03,707 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 740.32 1012.26
- sglang_output_tokens 213.39 293.96
- 2025-07-20 15:32:03,707 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 135 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:04,724 - sglang - INFO - [2025-07-20 15:32:04 TP0] Decode batch. #running-req: 10, #token: 29767, token usage: 0.78, gen throughput (token/s): 219.74, #queue-req: 365
- 2025-07-20 15:32:04,724 - __main__ - INFO - sglang running req: 10 queue req: 365
- 2025-07-20 15:32:05,705 - sglang - INFO - [2025-07-20 15:32:05 TP0] Decode batch. #running-req: 10, #token: 30167, token usage: 0.79, gen throughput (token/s): 407.66, #queue-req: 365
- 2025-07-20 15:32:05,705 - __main__ - INFO - sglang running req: 10 queue req: 365
- 2025-07-20 15:32:06,380 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2C47380T4440125017008.pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:32:06,395 - sglang - INFO - [2025-07-20 15:32:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 364
- 2025-07-20 15:32:06,395 - __main__ - INFO - sglang running req: 9 queue req: 364
- 2025-07-20 15:32:06,596 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-12
- 2025-07-20 15:32:07,503 - sglang - INFO - [2025-07-20 15:32:07 TP0] Decode batch. #running-req: 10, #token: 28784, token usage: 0.76, gen throughput (token/s): 221.89, #queue-req: 365
- 2025-07-20 15:32:07,503 - __main__ - INFO - sglang running req: 10 queue req: 365
- 2025-07-20 15:32:07,651 - sglang - INFO - [2025-07-20 15:32:07 TP0] Prefill batch. #new-seq: 2, #new-token: 3524, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 363
- 2025-07-20 15:32:07,651 - __main__ - INFO - sglang running req: 9 queue req: 363
- 2025-07-20 15:32:09,687 - sglang - INFO - [2025-07-20 15:32:09 TP0] Decode batch. #running-req: 11, #token: 29499, token usage: 0.78, gen throughput (token/s): 198.24, #queue-req: 363
- 2025-07-20 15:32:09,688 - __main__ - INFO - sglang running req: 11 queue req: 363
- 2025-07-20 15:32:10,676 - sglang - INFO - [2025-07-20 15:32:10 TP0] Decode batch. #running-req: 11, #token: 29939, token usage: 0.79, gen throughput (token/s): 445.05, #queue-req: 363
- 2025-07-20 15:32:10,676 - __main__ - INFO - sglang running req: 11 queue req: 363
- 2025-07-20 15:32:11,516 - sglang - INFO - [2025-07-20 15:32:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2575, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 362
- 2025-07-20 15:32:11,516 - __main__ - INFO - sglang running req: 10 queue req: 362
- 2025-07-20 15:32:12,452 - sglang - INFO - [2025-07-20 15:32:12 TP0] Decode batch. #running-req: 11, #token: 29388, token usage: 0.77, gen throughput (token/s): 247.19, #queue-req: 362
- 2025-07-20 15:32:12,452 - __main__ - INFO - sglang running req: 11 queue req: 362
- 2025-07-20 15:32:12,772 - sglang - INFO - [2025-07-20 15:32:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2406, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 361
- 2025-07-20 15:32:12,772 - __main__ - INFO - sglang running req: 10 queue req: 361
- 2025-07-20 15:32:13,709 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:13,709 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 746.84 1005.86
- sglang_output_tokens 217.03 294.92
- 2025-07-20 15:32:13,709 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 138 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:14,174 - sglang - INFO - [2025-07-20 15:32:14 TP0] Decode batch. #running-req: 11, #token: 29541, token usage: 0.78, gen throughput (token/s): 254.89, #queue-req: 361
- 2025-07-20 15:32:14,174 - __main__ - INFO - sglang running req: 11 queue req: 361
- 2025-07-20 15:32:15,163 - sglang - INFO - [2025-07-20 15:32:15 TP0] Decode batch. #running-req: 11, #token: 29981, token usage: 0.79, gen throughput (token/s): 444.79, #queue-req: 361
- 2025-07-20 15:32:15,164 - __main__ - INFO - sglang running req: 11 queue req: 361
- 2025-07-20 15:32:15,238 - sglang - INFO - [2025-07-20 15:32:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2100, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 360
- 2025-07-20 15:32:15,238 - __main__ - INFO - sglang running req: 10 queue req: 360
- 2025-07-20 15:32:16,842 - sglang - INFO - [2025-07-20 15:32:16 TP0] Decode batch. #running-req: 11, #token: 30966, token usage: 0.82, gen throughput (token/s): 261.56, #queue-req: 360
- 2025-07-20 15:32:16,842 - __main__ - INFO - sglang running req: 11 queue req: 360
- 2025-07-20 15:32:17,735 - sglang - INFO - [2025-07-20 15:32:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 359
- 2025-07-20 15:32:17,735 - __main__ - INFO - sglang running req: 10 queue req: 359
- 2025-07-20 15:32:18,652 - sglang - INFO - [2025-07-20 15:32:18 TP0] Decode batch. #running-req: 11, #token: 30927, token usage: 0.81, gen throughput (token/s): 243.53, #queue-req: 359
- 2025-07-20 15:32:18,652 - __main__ - INFO - sglang running req: 11 queue req: 359
- 2025-07-20 15:32:19,681 - sglang - INFO - [2025-07-20 15:32:19 TP0] Decode batch. #running-req: 11, #token: 31367, token usage: 0.83, gen throughput (token/s): 424.66, #queue-req: 359
- 2025-07-20 15:32:19,681 - __main__ - INFO - sglang running req: 11 queue req: 359
- 2025-07-20 15:32:20,670 - sglang - INFO - [2025-07-20 15:32:20 TP0] Decode batch. #running-req: 11, #token: 31807, token usage: 0.84, gen throughput (token/s): 444.85, #queue-req: 359
- 2025-07-20 15:32:20,670 - __main__ - INFO - sglang running req: 11 queue req: 359
- 2025-07-20 15:32:21,659 - sglang - INFO - [2025-07-20 15:32:21 TP0] Decode batch. #running-req: 11, #token: 32247, token usage: 0.85, gen throughput (token/s): 444.94, #queue-req: 359
- 2025-07-20 15:32:21,659 - __main__ - INFO - sglang running req: 11 queue req: 359
- 2025-07-20 15:32:21,932 - sglang - INFO - [2025-07-20 15:32:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2768, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 358
- 2025-07-20 15:32:21,933 - __main__ - INFO - sglang running req: 10 queue req: 358
- 2025-07-20 15:32:23,467 - sglang - INFO - [2025-07-20 15:32:23 TP0] Decode batch. #running-req: 11, #token: 32281, token usage: 0.85, gen throughput (token/s): 242.81, #queue-req: 358
- 2025-07-20 15:32:23,467 - __main__ - INFO - sglang running req: 11 queue req: 358
- 2025-07-20 15:32:23,710 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:23,711 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 744.49 997.11
- sglang_output_tokens 215.72 290.57
- 2025-07-20 15:32:23,711 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 141 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:24,463 - sglang - INFO - [2025-07-20 15:32:24 TP0] Decode batch. #running-req: 11, #token: 32721, token usage: 0.86, gen throughput (token/s): 441.49, #queue-req: 358
- 2025-07-20 15:32:24,463 - __main__ - INFO - sglang running req: 11 queue req: 358
- 2025-07-20 15:32:25,463 - sglang - INFO - [2025-07-20 15:32:25 TP0] Decode batch. #running-req: 11, #token: 33161, token usage: 0.87, gen throughput (token/s): 440.22, #queue-req: 358
- 2025-07-20 15:32:25,463 - __main__ - INFO - sglang running req: 11 queue req: 358
- 2025-07-20 15:32:25,762 - sglang - INFO - [2025-07-20 15:32:25 TP0] Prefill batch. #new-seq: 1, #new-token: 1751, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 357
- 2025-07-20 15:32:25,763 - __main__ - INFO - sglang running req: 10 queue req: 357
- 2025-07-20 15:32:26,472 - sglang - INFO - [2025-07-20 15:32:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2252, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 356
- 2025-07-20 15:32:26,472 - __main__ - INFO - sglang running req: 10 queue req: 356
- 2025-07-20 15:32:27,795 - sglang - INFO - [2025-07-20 15:32:27 TP0] Decode batch. #running-req: 11, #token: 31748, token usage: 0.84, gen throughput (token/s): 187.79, #queue-req: 356
- 2025-07-20 15:32:27,795 - __main__ - INFO - sglang running req: 11 queue req: 356
- 2025-07-20 15:32:28,069 - sglang - INFO - [2025-07-20 15:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1580, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 355
- 2025-07-20 15:32:28,069 - __main__ - INFO - sglang running req: 10 queue req: 355
- 2025-07-20 15:32:28,849 - sglang - INFO - [2025-07-20 15:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2805, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 354
- 2025-07-20 15:32:28,849 - __main__ - INFO - sglang running req: 10 queue req: 354
- 2025-07-20 15:32:30,181 - sglang - INFO - [2025-07-20 15:32:30 TP0] Decode batch. #running-req: 11, #token: 30600, token usage: 0.81, gen throughput (token/s): 183.56, #queue-req: 354
- 2025-07-20 15:32:30,181 - __main__ - INFO - sglang running req: 11 queue req: 354
- 2025-07-20 15:32:30,454 - sglang - INFO - [2025-07-20 15:32:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1897, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 353
- 2025-07-20 15:32:30,455 - __main__ - INFO - sglang running req: 10 queue req: 353
- 2025-07-20 15:32:31,822 - sglang - INFO - [2025-07-20 15:32:31 TP0] Decode batch. #running-req: 11, #token: 29391, token usage: 0.77, gen throughput (token/s): 267.45, #queue-req: 353
- 2025-07-20 15:32:31,823 - __main__ - INFO - sglang running req: 11 queue req: 353
- 2025-07-20 15:32:32,811 - sglang - INFO - [2025-07-20 15:32:32 TP0] Decode batch. #running-req: 11, #token: 29831, token usage: 0.79, gen throughput (token/s): 445.18, #queue-req: 353
- 2025-07-20 15:32:32,811 - __main__ - INFO - sglang running req: 11 queue req: 353
- 2025-07-20 15:32:33,712 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:33,713 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 754.69 1024.19
- sglang_output_tokens 218.07 297.92
- 2025-07-20 15:32:33,713 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 146 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:33,800 - sglang - INFO - [2025-07-20 15:32:33 TP0] Decode batch. #running-req: 11, #token: 30271, token usage: 0.80, gen throughput (token/s): 444.69, #queue-req: 353
- 2025-07-20 15:32:33,801 - __main__ - INFO - sglang running req: 11 queue req: 353
- 2025-07-20 15:32:34,444 - sglang - INFO - [2025-07-20 15:32:34 TP0] Prefill batch. #new-seq: 1, #new-token: 1937, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 352
- 2025-07-20 15:32:34,444 - __main__ - INFO - sglang running req: 10 queue req: 352
- 2025-07-20 15:32:35,198 - sglang - INFO - [2025-07-20 15:32:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 351
- 2025-07-20 15:32:35,198 - __main__ - INFO - sglang running req: 10 queue req: 351
- 2025-07-20 15:32:36,279 - sglang - INFO - [2025-07-20 15:32:36 TP0] Decode batch. #running-req: 11, #token: 28525, token usage: 0.75, gen throughput (token/s): 176.73, #queue-req: 351
- 2025-07-20 15:32:36,279 - __main__ - INFO - sglang running req: 11 queue req: 351
- 2025-07-20 15:32:37,265 - sglang - INFO - [2025-07-20 15:32:37 TP0] Decode batch. #running-req: 11, #token: 28965, token usage: 0.76, gen throughput (token/s): 446.33, #queue-req: 351
- 2025-07-20 15:32:37,265 - __main__ - INFO - sglang running req: 11 queue req: 351
- 2025-07-20 15:32:37,734 - sglang - INFO - [2025-07-20 15:32:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2762, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 350
- 2025-07-20 15:32:37,735 - __main__ - INFO - sglang running req: 10 queue req: 350
- 2025-07-20 15:32:39,063 - sglang - INFO - [2025-07-20 15:32:39 TP0] Decode batch. #running-req: 11, #token: 25879, token usage: 0.68, gen throughput (token/s): 244.02, #queue-req: 350
- 2025-07-20 15:32:39,064 - __main__ - INFO - sglang running req: 11 queue req: 350
- 2025-07-20 15:32:39,088 - sglang - INFO - [2025-07-20 15:32:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 349
- 2025-07-20 15:32:39,088 - __main__ - INFO - sglang running req: 10 queue req: 349
- 2025-07-20 15:32:40,095 - sglang - INFO - [2025-07-20 15:32:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1892, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 348
- 2025-07-20 15:32:40,096 - __main__ - INFO - sglang running req: 10 queue req: 348
- 2025-07-20 15:32:41,511 - sglang - INFO - [2025-07-20 15:32:41 TP0] Decode batch. #running-req: 11, #token: 29098, token usage: 0.77, gen throughput (token/s): 178.99, #queue-req: 348
- 2025-07-20 15:32:41,511 - __main__ - INFO - sglang running req: 11 queue req: 348
- 2025-07-20 15:32:42,497 - sglang - INFO - [2025-07-20 15:32:42 TP0] Decode batch. #running-req: 11, #token: 29538, token usage: 0.78, gen throughput (token/s): 445.91, #queue-req: 348
- 2025-07-20 15:32:42,498 - __main__ - INFO - sglang running req: 11 queue req: 348
- 2025-07-20 15:32:43,481 - sglang - INFO - [2025-07-20 15:32:43 TP0] Decode batch. #running-req: 11, #token: 29978, token usage: 0.79, gen throughput (token/s): 447.04, #queue-req: 348
- 2025-07-20 15:32:43,482 - __main__ - INFO - sglang running req: 11 queue req: 348
- 2025-07-20 15:32:43,714 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:43,715 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 763.76 1017.35
- sglang_output_tokens 220.31 294.41
- 2025-07-20 15:32:43,715 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 151 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:44,073 - sglang - INFO - [2025-07-20 15:32:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2225, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 347
- 2025-07-20 15:32:44,073 - __main__ - INFO - sglang running req: 10 queue req: 347
- 2025-07-20 15:32:45,172 - sglang - INFO - [2025-07-20 15:32:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 346
- 2025-07-20 15:32:45,172 - __main__ - INFO - sglang running req: 10 queue req: 346
- 2025-07-20 15:32:46,033 - sglang - INFO - [2025-07-20 15:32:46 TP0] Decode batch. #running-req: 11, #token: 30627, token usage: 0.81, gen throughput (token/s): 171.70, #queue-req: 346
- 2025-07-20 15:32:46,033 - __main__ - INFO - sglang running req: 11 queue req: 346
- 2025-07-20 15:32:47,024 - sglang - INFO - [2025-07-20 15:32:47 TP0] Decode batch. #running-req: 11, #token: 31067, token usage: 0.82, gen throughput (token/s): 443.73, #queue-req: 346
- 2025-07-20 15:32:47,024 - __main__ - INFO - sglang running req: 11 queue req: 346
- 2025-07-20 15:32:48,016 - sglang - INFO - [2025-07-20 15:32:48 TP0] Decode batch. #running-req: 11, #token: 31507, token usage: 0.83, gen throughput (token/s): 443.81, #queue-req: 346
- 2025-07-20 15:32:48,016 - __main__ - INFO - sglang running req: 11 queue req: 346
- 2025-07-20 15:32:49,009 - sglang - INFO - [2025-07-20 15:32:49 TP0] Decode batch. #running-req: 11, #token: 31947, token usage: 0.84, gen throughput (token/s): 442.97, #queue-req: 346
- 2025-07-20 15:32:49,009 - __main__ - INFO - sglang running req: 11 queue req: 346
- 2025-07-20 15:32:49,183 - sglang - INFO - [2025-07-20 15:32:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 345
- 2025-07-20 15:32:49,183 - __main__ - INFO - sglang running req: 10 queue req: 345
- 2025-07-20 15:32:50,727 - sglang - INFO - [2025-07-20 15:32:50 TP0] Decode batch. #running-req: 11, #token: 31007, token usage: 0.82, gen throughput (token/s): 255.52, #queue-req: 345
- 2025-07-20 15:32:50,727 - __main__ - INFO - sglang running req: 11 queue req: 345
- 2025-07-20 15:32:51,122 - sglang - INFO - [2025-07-20 15:32:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2519, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 344
- 2025-07-20 15:32:51,123 - __main__ - INFO - sglang running req: 10 queue req: 344
- 2025-07-20 15:32:52,475 - sglang - INFO - [2025-07-20 15:32:52 TP0] Decode batch. #running-req: 11, #token: 31504, token usage: 0.83, gen throughput (token/s): 251.16, #queue-req: 344
- 2025-07-20 15:32:52,475 - __main__ - INFO - sglang running req: 11 queue req: 344
- 2025-07-20 15:32:52,574 - sglang - INFO - [2025-07-20 15:32:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2084, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 343
- 2025-07-20 15:32:52,575 - __main__ - INFO - sglang running req: 10 queue req: 343
- 2025-07-20 15:32:53,717 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:32:53,718 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 769.97 1011.21
- sglang_output_tokens 221.53 292.11
- 2025-07-20 15:32:53,718 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 156 | 500
- 1 | 0 | 10
- 2025-07-20 15:32:54,155 - sglang - INFO - [2025-07-20 15:32:54 TP0] Decode batch. #running-req: 11, #token: 31633, token usage: 0.83, gen throughput (token/s): 261.31, #queue-req: 343
- 2025-07-20 15:32:54,155 - __main__ - INFO - sglang running req: 11 queue req: 343
- 2025-07-20 15:32:54,924 - sglang - INFO - [2025-07-20 15:32:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2398, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 342
- 2025-07-20 15:32:54,924 - __main__ - INFO - sglang running req: 10 queue req: 342
- 2025-07-20 15:32:55,897 - sglang - INFO - [2025-07-20 15:32:55 TP0] Decode batch. #running-req: 11, #token: 30843, token usage: 0.81, gen throughput (token/s): 251.94, #queue-req: 342
- 2025-07-20 15:32:55,898 - __main__ - INFO - sglang running req: 11 queue req: 342
- 2025-07-20 15:32:56,890 - sglang - INFO - [2025-07-20 15:32:56 TP0] Decode batch. #running-req: 11, #token: 31283, token usage: 0.82, gen throughput (token/s): 443.09, #queue-req: 342
- 2025-07-20 15:32:56,891 - __main__ - INFO - sglang running req: 11 queue req: 342
- 2025-07-20 15:32:57,710 - sglang - INFO - [2025-07-20 15:32:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2052, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 341
- 2025-07-20 15:32:57,710 - __main__ - INFO - sglang running req: 10 queue req: 341
- 2025-07-20 15:32:58,567 - sglang - INFO - [2025-07-20 15:32:58 TP0] Decode batch. #running-req: 11, #token: 31379, token usage: 0.83, gen throughput (token/s): 261.78, #queue-req: 341
- 2025-07-20 15:32:58,568 - __main__ - INFO - sglang running req: 11 queue req: 341
- 2025-07-20 15:32:59,484 - sglang - INFO - [2025-07-20 15:32:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2838, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 340
- 2025-07-20 15:32:59,484 - __main__ - INFO - sglang running req: 10 queue req: 340
- 2025-07-20 15:33:00,391 - sglang - INFO - [2025-07-20 15:33:00 TP0] Decode batch. #running-req: 11, #token: 32019, token usage: 0.84, gen throughput (token/s): 240.77, #queue-req: 340
- 2025-07-20 15:33:00,391 - __main__ - INFO - sglang running req: 11 queue req: 340
- 2025-07-20 15:33:01,238 - sglang - INFO - [2025-07-20 15:33:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1881, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 339
- 2025-07-20 15:33:01,239 - __main__ - INFO - sglang running req: 10 queue req: 339
- 2025-07-20 15:33:02,039 - sglang - INFO - [2025-07-20 15:33:02 TP0] Decode batch. #running-req: 11, #token: 30680, token usage: 0.81, gen throughput (token/s): 266.30, #queue-req: 339
- 2025-07-20 15:33:02,039 - __main__ - INFO - sglang running req: 11 queue req: 339
- 2025-07-20 15:33:03,032 - sglang - INFO - [2025-07-20 15:33:03 TP0] Decode batch. #running-req: 11, #token: 31120, token usage: 0.82, gen throughput (token/s): 443.43, #queue-req: 339
- 2025-07-20 15:33:03,032 - __main__ - INFO - sglang running req: 11 queue req: 339
- 2025-07-20 15:33:03,719 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:03,720 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 774.03 1016.18
- sglang_output_tokens 222.38 293.57
- 2025-07-20 15:33:03,720 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 160 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:04,025 - sglang - INFO - [2025-07-20 15:33:04 TP0] Decode batch. #running-req: 11, #token: 31560, token usage: 0.83, gen throughput (token/s): 443.02, #queue-req: 339
- 2025-07-20 15:33:04,025 - __main__ - INFO - sglang running req: 11 queue req: 339
- 2025-07-20 15:33:05,018 - sglang - INFO - [2025-07-20 15:33:05 TP0] Decode batch. #running-req: 11, #token: 32000, token usage: 0.84, gen throughput (token/s): 442.84, #queue-req: 339
- 2025-07-20 15:33:05,019 - __main__ - INFO - sglang running req: 11 queue req: 339
- 2025-07-20 15:33:06,082 - sglang - INFO - [2025-07-20 15:33:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2685, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 338
- 2025-07-20 15:33:06,082 - __main__ - INFO - sglang running req: 10 queue req: 338
- 2025-07-20 15:33:06,970 - sglang - INFO - [2025-07-20 15:33:06 TP0] Decode batch. #running-req: 11, #token: 31505, token usage: 0.83, gen throughput (token/s): 224.95, #queue-req: 338
- 2025-07-20 15:33:06,970 - __main__ - INFO - sglang running req: 11 queue req: 338
- 2025-07-20 15:33:07,959 - sglang - INFO - [2025-07-20 15:33:07 TP0] Decode batch. #running-req: 11, #token: 31945, token usage: 0.84, gen throughput (token/s): 444.80, #queue-req: 338
- 2025-07-20 15:33:07,959 - __main__ - INFO - sglang running req: 11 queue req: 338
- 2025-07-20 15:33:08,953 - sglang - INFO - [2025-07-20 15:33:08 TP0] Decode batch. #running-req: 11, #token: 32385, token usage: 0.85, gen throughput (token/s): 442.61, #queue-req: 338
- 2025-07-20 15:33:08,954 - __main__ - INFO - sglang running req: 11 queue req: 338
- 2025-07-20 15:33:09,939 - sglang - INFO - [2025-07-20 15:33:09 TP0] Decode batch. #running-req: 10, #token: 30406, token usage: 0.80, gen throughput (token/s): 408.62, #queue-req: 338
- 2025-07-20 15:33:09,940 - __main__ - INFO - sglang running req: 10 queue req: 338
- 2025-07-20 15:33:10,113 - sglang - INFO - [2025-07-20 15:33:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2891, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 337
- 2025-07-20 15:33:10,113 - __main__ - INFO - sglang running req: 9 queue req: 337
- 2025-07-20 15:33:11,753 - sglang - INFO - [2025-07-20 15:33:11 TP0] Decode batch. #running-req: 9, #token: 26362, token usage: 0.69, gen throughput (token/s): 219.50, #queue-req: 337
- 2025-07-20 15:33:11,753 - __main__ - INFO - sglang running req: 9 queue req: 337
- 2025-07-20 15:33:11,753 - sglang - INFO - [2025-07-20 15:33:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2898, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 336
- 2025-07-20 15:33:11,753 - __main__ - INFO - sglang running req: 9 queue req: 336
- 2025-07-20 15:33:13,565 - sglang - INFO - [2025-07-20 15:33:13 TP0] Decode batch. #running-req: 10, #token: 29660, token usage: 0.78, gen throughput (token/s): 220.70, #queue-req: 336
- 2025-07-20 15:33:13,565 - __main__ - INFO - sglang running req: 10 queue req: 336
- 2025-07-20 15:33:13,713 - sglang - INFO - [2025-07-20 15:33:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2739, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 335
- 2025-07-20 15:33:13,713 - __main__ - INFO - sglang running req: 9 queue req: 335
- 2025-07-20 15:33:13,720 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:13,721 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 783.38 1016.88
- sglang_output_tokens 225.15 293.53
- 2025-07-20 15:33:13,721 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 165 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:15,349 - sglang - INFO - [2025-07-20 15:33:15 TP0] Decode batch. #running-req: 10, #token: 30113, token usage: 0.79, gen throughput (token/s): 223.63, #queue-req: 335
- 2025-07-20 15:33:15,350 - __main__ - INFO - sglang running req: 10 queue req: 335
- 2025-07-20 15:33:15,769 - sglang - INFO - [2025-07-20 15:33:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2978, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 334
- 2025-07-20 15:33:15,769 - __main__ - INFO - sglang running req: 9 queue req: 334
- 2025-07-20 15:33:16,856 - sglang - INFO - [2025-07-20 15:33:16 TP0] Prefill batch. #new-seq: 1, #new-token: 1462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 333
- 2025-07-20 15:33:16,856 - __main__ - INFO - sglang running req: 9 queue req: 333
- 2025-07-20 15:33:17,712 - sglang - INFO - [2025-07-20 15:33:17 TP0] Prefill batch. #new-seq: 2, #new-token: 5319, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 9, #queue-req: 331
- 2025-07-20 15:33:17,712 - __main__ - INFO - sglang running req: 9 queue req: 331
- 2025-07-20 15:33:19,285 - sglang - INFO - [2025-07-20 15:33:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 330
- 2025-07-20 15:33:19,285 - __main__ - INFO - sglang running req: 10 queue req: 330
- 2025-07-20 15:33:20,115 - sglang - INFO - [2025-07-20 15:33:20 TP0] Decode batch. #running-req: 11, #token: 30703, token usage: 0.81, gen throughput (token/s): 83.51, #queue-req: 330
- 2025-07-20 15:33:20,116 - __main__ - INFO - sglang running req: 11 queue req: 330
- 2025-07-20 15:33:21,101 - sglang - INFO - [2025-07-20 15:33:21 TP0] Decode batch. #running-req: 11, #token: 31143, token usage: 0.82, gen throughput (token/s): 446.54, #queue-req: 330
- 2025-07-20 15:33:21,101 - __main__ - INFO - sglang running req: 11 queue req: 330
- 2025-07-20 15:33:22,087 - sglang - INFO - [2025-07-20 15:33:22 TP0] Decode batch. #running-req: 11, #token: 31583, token usage: 0.83, gen throughput (token/s): 446.12, #queue-req: 330
- 2025-07-20 15:33:22,087 - __main__ - INFO - sglang running req: 11 queue req: 330
- 2025-07-20 15:33:23,079 - sglang - INFO - [2025-07-20 15:33:23 TP0] Decode batch. #running-req: 11, #token: 32023, token usage: 0.84, gen throughput (token/s): 443.43, #queue-req: 330
- 2025-07-20 15:33:23,080 - __main__ - INFO - sglang running req: 11 queue req: 330
- 2025-07-20 15:33:23,722 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:23,722 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 786.77 1030.07
- sglang_output_tokens 226.11 296.90
- 2025-07-20 15:33:23,722 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 169 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:24,073 - sglang - INFO - [2025-07-20 15:33:24 TP0] Decode batch. #running-req: 10, #token: 29296, token usage: 0.77, gen throughput (token/s): 441.63, #queue-req: 330
- 2025-07-20 15:33:24,073 - __main__ - INFO - sglang running req: 10 queue req: 330
- 2025-07-20 15:33:24,074 - sglang - INFO - [2025-07-20 15:33:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2829, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 329
- 2025-07-20 15:33:24,074 - __main__ - INFO - sglang running req: 10 queue req: 329
- 2025-07-20 15:33:25,894 - sglang - INFO - [2025-07-20 15:33:25 TP0] Decode batch. #running-req: 11, #token: 32565, token usage: 0.86, gen throughput (token/s): 241.66, #queue-req: 329
- 2025-07-20 15:33:25,894 - __main__ - INFO - sglang running req: 11 queue req: 329
- 2025-07-20 15:33:26,896 - sglang - INFO - [2025-07-20 15:33:26 TP0] Decode batch. #running-req: 11, #token: 33005, token usage: 0.87, gen throughput (token/s): 442.15, #queue-req: 329
- 2025-07-20 15:33:26,897 - __main__ - INFO - sglang running req: 11 queue req: 329
- 2025-07-20 15:33:27,884 - sglang - INFO - [2025-07-20 15:33:27 TP0] Decode batch. #running-req: 10, #token: 31689, token usage: 0.83, gen throughput (token/s): 425.29, #queue-req: 329
- 2025-07-20 15:33:27,884 - __main__ - INFO - sglang running req: 10 queue req: 329
- 2025-07-20 15:33:28,872 - sglang - INFO - [2025-07-20 15:33:28 TP0] Decode batch. #running-req: 10, #token: 32089, token usage: 0.84, gen throughput (token/s): 404.87, #queue-req: 329
- 2025-07-20 15:33:28,872 - __main__ - INFO - sglang running req: 10 queue req: 329
- 2025-07-20 15:33:29,861 - sglang - INFO - [2025-07-20 15:33:29 TP0] Decode batch. #running-req: 10, #token: 32489, token usage: 0.86, gen throughput (token/s): 404.47, #queue-req: 329
- 2025-07-20 15:33:29,861 - __main__ - INFO - sglang running req: 10 queue req: 329
- 2025-07-20 15:33:30,754 - sglang - INFO - [2025-07-20 15:33:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2573, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 328
- 2025-07-20 15:33:30,755 - __main__ - INFO - sglang running req: 9 queue req: 328
- 2025-07-20 15:33:31,638 - sglang - INFO - [2025-07-20 15:33:31 TP0] Decode batch. #running-req: 10, #token: 31745, token usage: 0.84, gen throughput (token/s): 224.54, #queue-req: 328
- 2025-07-20 15:33:31,638 - __main__ - INFO - sglang running req: 10 queue req: 328
- 2025-07-20 15:33:32,662 - sglang - INFO - [2025-07-20 15:33:32 TP0] Decode batch. #running-req: 10, #token: 32145, token usage: 0.85, gen throughput (token/s): 390.48, #queue-req: 328
- 2025-07-20 15:33:32,662 - __main__ - INFO - sglang running req: 10 queue req: 328
- 2025-07-20 15:33:33,654 - sglang - INFO - [2025-07-20 15:33:33 TP0] Decode batch. #running-req: 10, #token: 32545, token usage: 0.86, gen throughput (token/s): 403.28, #queue-req: 328
- 2025-07-20 15:33:33,654 - __main__ - INFO - sglang running req: 10 queue req: 328
- 2025-07-20 15:33:33,724 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:33,724 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 784.51 999.24
- sglang_output_tokens 225.47 288.94
- 2025-07-20 15:33:33,724 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 172 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:34,643 - sglang - INFO - [2025-07-20 15:33:34 TP0] Decode batch. #running-req: 10, #token: 32945, token usage: 0.87, gen throughput (token/s): 404.34, #queue-req: 328
- 2025-07-20 15:33:34,643 - __main__ - INFO - sglang running req: 10 queue req: 328
- 2025-07-20 15:33:35,064 - sglang - INFO - [2025-07-20 15:33:35 TP0] Prefill batch. #new-seq: 1, #new-token: 1640, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 327
- 2025-07-20 15:33:35,064 - __main__ - INFO - sglang running req: 9 queue req: 327
- 2025-07-20 15:33:36,210 - sglang - INFO - [2025-07-20 15:33:36 TP0] Decode batch. #running-req: 10, #token: 31481, token usage: 0.83, gen throughput (token/s): 254.60, #queue-req: 327
- 2025-07-20 15:33:36,211 - __main__ - INFO - sglang running req: 10 queue req: 327
- 2025-07-20 15:33:37,196 - sglang - INFO - [2025-07-20 15:33:37 TP0] Decode batch. #running-req: 10, #token: 31881, token usage: 0.84, gen throughput (token/s): 405.65, #queue-req: 327
- 2025-07-20 15:33:37,197 - __main__ - INFO - sglang running req: 10 queue req: 327
- 2025-07-20 15:33:38,186 - sglang - INFO - [2025-07-20 15:33:38 TP0] Decode batch. #running-req: 10, #token: 32281, token usage: 0.85, gen throughput (token/s): 404.24, #queue-req: 327
- 2025-07-20 15:33:38,186 - __main__ - INFO - sglang running req: 10 queue req: 327
- 2025-07-20 15:33:39,178 - sglang - INFO - [2025-07-20 15:33:39 TP0] Decode batch. #running-req: 10, #token: 32681, token usage: 0.86, gen throughput (token/s): 403.35, #queue-req: 327
- 2025-07-20 15:33:39,178 - __main__ - INFO - sglang running req: 10 queue req: 327
- 2025-07-20 15:33:40,170 - sglang - INFO - [2025-07-20 15:33:40 TP0] Decode batch. #running-req: 10, #token: 33081, token usage: 0.87, gen throughput (token/s): 403.31, #queue-req: 327
- 2025-07-20 15:33:40,170 - __main__ - INFO - sglang running req: 10 queue req: 327
- 2025-07-20 15:33:40,666 - sglang - INFO - [2025-07-20 15:33:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1618, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 326
- 2025-07-20 15:33:40,666 - __main__ - INFO - sglang running req: 9 queue req: 326
- 2025-07-20 15:33:41,369 - sglang - INFO - [2025-07-20 15:33:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 325
- 2025-07-20 15:33:41,369 - __main__ - INFO - sglang running req: 9 queue req: 325
- 2025-07-20 15:33:42,387 - sglang - INFO - [2025-07-20 15:33:42 TP0] Decode batch. #running-req: 10, #token: 29612, token usage: 0.78, gen throughput (token/s): 179.46, #queue-req: 325
- 2025-07-20 15:33:42,387 - __main__ - INFO - sglang running req: 10 queue req: 325
- 2025-07-20 15:33:43,321 - sglang - INFO - [2025-07-20 15:33:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2625, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 324
- 2025-07-20 15:33:43,321 - __main__ - INFO - sglang running req: 9 queue req: 324
- 2025-07-20 15:33:43,725 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:43,726 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 790.38 1004.70
- sglang_output_tokens 227.50 292.34
- 2025-07-20 15:33:43,726 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 176 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:44,155 - sglang - INFO - [2025-07-20 15:33:44 TP0] Decode batch. #running-req: 10, #token: 29198, token usage: 0.77, gen throughput (token/s): 225.75, #queue-req: 324
- 2025-07-20 15:33:44,155 - __main__ - INFO - sglang running req: 10 queue req: 324
- 2025-07-20 15:33:44,769 - sglang - INFO - [2025-07-20 15:33:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2697, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 323
- 2025-07-20 15:33:44,769 - __main__ - INFO - sglang running req: 9 queue req: 323
- 2025-07-20 15:33:45,645 - sglang - INFO - [2025-07-20 15:33:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2785, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 322
- 2025-07-20 15:33:45,646 - __main__ - INFO - sglang running req: 9 queue req: 322
- 2025-07-20 15:33:46,747 - sglang - INFO - [2025-07-20 15:33:46 TP0] Decode batch. #running-req: 10, #token: 26069, token usage: 0.69, gen throughput (token/s): 153.55, #queue-req: 322
- 2025-07-20 15:33:46,747 - __main__ - INFO - sglang running req: 10 queue req: 322
- 2025-07-20 15:33:46,771 - sglang - INFO - [2025-07-20 15:33:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2779, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 321
- 2025-07-20 15:33:46,771 - __main__ - INFO - sglang running req: 9 queue req: 321
- 2025-07-20 15:33:47,897 - sglang - INFO - [2025-07-20 15:33:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2202, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 320
- 2025-07-20 15:33:47,897 - __main__ - INFO - sglang running req: 9 queue req: 320
- 2025-07-20 15:33:49,254 - sglang - INFO - [2025-07-20 15:33:49 TP0] Decode batch. #running-req: 10, #token: 27594, token usage: 0.73, gen throughput (token/s): 158.71, #queue-req: 320
- 2025-07-20 15:33:49,255 - __main__ - INFO - sglang running req: 10 queue req: 320
- 2025-07-20 15:33:50,285 - sglang - INFO - [2025-07-20 15:33:50 TP0] Decode batch. #running-req: 10, #token: 27994, token usage: 0.74, gen throughput (token/s): 388.19, #queue-req: 320
- 2025-07-20 15:33:50,285 - __main__ - INFO - sglang running req: 10 queue req: 320
- 2025-07-20 15:33:51,261 - sglang - INFO - [2025-07-20 15:33:51 TP0] Decode batch. #running-req: 10, #token: 28394, token usage: 0.75, gen throughput (token/s): 409.60, #queue-req: 320
- 2025-07-20 15:33:51,262 - __main__ - INFO - sglang running req: 10 queue req: 320
- 2025-07-20 15:33:52,241 - sglang - INFO - [2025-07-20 15:33:52 TP0] Decode batch. #running-req: 10, #token: 28794, token usage: 0.76, gen throughput (token/s): 408.30, #queue-req: 320
- 2025-07-20 15:33:52,241 - __main__ - INFO - sglang running req: 10 queue req: 320
- 2025-07-20 15:33:52,683 - sglang - INFO - [2025-07-20 15:33:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 319
- 2025-07-20 15:33:52,683 - __main__ - INFO - sglang running req: 9 queue req: 319
- 2025-07-20 15:33:53,728 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:33:53,728 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 802.07 1020.27
- sglang_output_tokens 230.23 294.95
- 2025-07-20 15:33:53,728 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 182 | 500
- 1 | 0 | 10
- 2025-07-20 15:33:53,730 - sglang - INFO - [2025-07-20 15:33:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2854, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 318
- 2025-07-20 15:33:53,731 - __main__ - INFO - sglang running req: 9 queue req: 318
- 2025-07-20 15:33:54,803 - sglang - INFO - [2025-07-20 15:33:54 TP0] Decode batch. #running-req: 10, #token: 29421, token usage: 0.77, gen throughput (token/s): 155.34, #queue-req: 318
- 2025-07-20 15:33:54,803 - __main__ - INFO - sglang running req: 10 queue req: 318
- 2025-07-20 15:33:55,785 - sglang - INFO - [2025-07-20 15:33:55 TP0] Decode batch. #running-req: 10, #token: 29821, token usage: 0.79, gen throughput (token/s): 407.34, #queue-req: 318
- 2025-07-20 15:33:55,786 - __main__ - INFO - sglang running req: 10 queue req: 318
- 2025-07-20 15:33:56,671 - sglang - INFO - [2025-07-20 15:33:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 317
- 2025-07-20 15:33:56,671 - __main__ - INFO - sglang running req: 9 queue req: 317
- 2025-07-20 15:33:57,594 - sglang - INFO - [2025-07-20 15:33:57 TP0] Decode batch. #running-req: 10, #token: 29257, token usage: 0.77, gen throughput (token/s): 220.61, #queue-req: 317
- 2025-07-20 15:33:57,594 - __main__ - INFO - sglang running req: 10 queue req: 317
- 2025-07-20 15:33:58,530 - sglang - INFO - [2025-07-20 15:33:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2161, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 316
- 2025-07-20 15:33:58,530 - __main__ - INFO - sglang running req: 9 queue req: 316
- 2025-07-20 15:33:59,248 - sglang - INFO - [2025-07-20 15:33:59 TP0] Decode batch. #running-req: 10, #token: 29550, token usage: 0.78, gen throughput (token/s): 241.24, #queue-req: 316
- 2025-07-20 15:33:59,248 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:00,230 - sglang - INFO - [2025-07-20 15:34:00 TP0] Decode batch. #running-req: 10, #token: 29950, token usage: 0.79, gen throughput (token/s): 407.32, #queue-req: 316
- 2025-07-20 15:34:00,230 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:01,216 - sglang - INFO - [2025-07-20 15:34:01 TP0] Decode batch. #running-req: 10, #token: 30350, token usage: 0.80, gen throughput (token/s): 405.78, #queue-req: 316
- 2025-07-20 15:34:01,216 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:02,202 - sglang - INFO - [2025-07-20 15:34:02 TP0] Decode batch. #running-req: 10, #token: 30750, token usage: 0.81, gen throughput (token/s): 405.73, #queue-req: 316
- 2025-07-20 15:34:02,202 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:03,188 - sglang - INFO - [2025-07-20 15:34:03 TP0] Decode batch. #running-req: 10, #token: 31150, token usage: 0.82, gen throughput (token/s): 405.54, #queue-req: 316
- 2025-07-20 15:34:03,188 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:03,729 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:03,730 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 796.00 1017.58
- sglang_output_tokens 228.53 293.85
- 2025-07-20 15:34:03,730 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 184 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:04,174 - sglang - INFO - [2025-07-20 15:34:04 TP0] Decode batch. #running-req: 10, #token: 31550, token usage: 0.83, gen throughput (token/s): 405.49, #queue-req: 316
- 2025-07-20 15:34:04,175 - __main__ - INFO - sglang running req: 10 queue req: 316
- 2025-07-20 15:34:05,091 - sglang - INFO - [2025-07-20 15:34:05 TP0] Prefill batch. #new-seq: 1, #new-token: 2511, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 315
- 2025-07-20 15:34:05,091 - __main__ - INFO - sglang running req: 9 queue req: 315
- 2025-07-20 15:34:05,919 - sglang - INFO - [2025-07-20 15:34:05 TP0] Decode batch. #running-req: 10, #token: 31237, token usage: 0.82, gen throughput (token/s): 228.72, #queue-req: 315
- 2025-07-20 15:34:05,919 - __main__ - INFO - sglang running req: 10 queue req: 315
- 2025-07-20 15:34:06,903 - sglang - INFO - [2025-07-20 15:34:06 TP0] Decode batch. #running-req: 10, #token: 31637, token usage: 0.83, gen throughput (token/s): 406.47, #queue-req: 315
- 2025-07-20 15:34:06,903 - __main__ - INFO - sglang running req: 10 queue req: 315
- 2025-07-20 15:34:07,076 - sglang - INFO - [2025-07-20 15:34:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2648, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 314
- 2025-07-20 15:34:07,076 - __main__ - INFO - sglang running req: 9 queue req: 314
- 2025-07-20 15:34:08,675 - sglang - INFO - [2025-07-20 15:34:08 TP0] Decode batch. #running-req: 10, #token: 31890, token usage: 0.84, gen throughput (token/s): 225.17, #queue-req: 314
- 2025-07-20 15:34:08,675 - __main__ - INFO - sglang running req: 10 queue req: 314
- 2025-07-20 15:34:09,667 - sglang - INFO - [2025-07-20 15:34:09 TP0] Decode batch. #running-req: 10, #token: 32290, token usage: 0.85, gen throughput (token/s): 403.09, #queue-req: 314
- 2025-07-20 15:34:09,668 - __main__ - INFO - sglang running req: 10 queue req: 314
- 2025-07-20 15:34:10,670 - sglang - INFO - [2025-07-20 15:34:10 TP0] Decode batch. #running-req: 10, #token: 32690, token usage: 0.86, gen throughput (token/s): 398.84, #queue-req: 314
- 2025-07-20 15:34:10,671 - __main__ - INFO - sglang running req: 10 queue req: 314
- 2025-07-20 15:34:11,665 - sglang - INFO - [2025-07-20 15:34:11 TP0] Decode batch. #running-req: 10, #token: 33090, token usage: 0.87, gen throughput (token/s): 402.02, #queue-req: 314
- 2025-07-20 15:34:11,665 - __main__ - INFO - sglang running req: 10 queue req: 314
- 2025-07-20 15:34:12,663 - sglang - INFO - [2025-07-20 15:34:12 TP0] Decode batch. #running-req: 10, #token: 33490, token usage: 0.88, gen throughput (token/s): 400.87, #queue-req: 314
- 2025-07-20 15:34:12,663 - __main__ - INFO - sglang running req: 10 queue req: 314
- 2025-07-20 15:34:13,310 - sglang - INFO - [2025-07-20 15:34:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2513, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 9, #queue-req: 313
- 2025-07-20 15:34:13,310 - __main__ - INFO - sglang running req: 9 queue req: 313
- 2025-07-20 15:34:13,731 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:13,732 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 795.22 987.39
- sglang_output_tokens 228.08 286.21
- 2025-07-20 15:34:13,732 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 187 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:14,409 - sglang - INFO - [2025-07-20 15:34:14 TP0] Decode batch. #running-req: 10, #token: 32865, token usage: 0.87, gen throughput (token/s): 228.52, #queue-req: 313
- 2025-07-20 15:34:14,409 - __main__ - INFO - sglang running req: 10 queue req: 313
- 2025-07-20 15:34:14,483 - sglang - INFO - [2025-07-20 15:34:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2013, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 312
- 2025-07-20 15:34:14,483 - __main__ - INFO - sglang running req: 9 queue req: 312
- 2025-07-20 15:34:15,828 - sglang - INFO - [2025-07-20 15:34:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1530, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 311
- 2025-07-20 15:34:15,828 - __main__ - INFO - sglang running req: 9 queue req: 311
- 2025-07-20 15:34:16,613 - sglang - INFO - [2025-07-20 15:34:16 TP0] Decode batch. #running-req: 10, #token: 29519, token usage: 0.78, gen throughput (token/s): 180.59, #queue-req: 311
- 2025-07-20 15:34:16,613 - __main__ - INFO - sglang running req: 10 queue req: 311
- 2025-07-20 15:34:16,809 - sglang - INFO - [2025-07-20 15:34:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 310
- 2025-07-20 15:34:16,809 - __main__ - INFO - sglang running req: 9 queue req: 310
- 2025-07-20 15:34:18,318 - sglang - INFO - [2025-07-20 15:34:18 TP0] Decode batch. #running-req: 10, #token: 29399, token usage: 0.77, gen throughput (token/s): 233.96, #queue-req: 310
- 2025-07-20 15:34:18,319 - __main__ - INFO - sglang running req: 10 queue req: 310
- 2025-07-20 15:34:19,355 - sglang - INFO - [2025-07-20 15:34:19 TP0] Decode batch. #running-req: 10, #token: 29799, token usage: 0.78, gen throughput (token/s): 385.98, #queue-req: 310
- 2025-07-20 15:34:19,355 - __main__ - INFO - sglang running req: 10 queue req: 310
- 2025-07-20 15:34:20,351 - sglang - INFO - [2025-07-20 15:34:20 TP0] Decode batch. #running-req: 10, #token: 30199, token usage: 0.79, gen throughput (token/s): 401.40, #queue-req: 310
- 2025-07-20 15:34:20,352 - __main__ - INFO - sglang running req: 10 queue req: 310
- 2025-07-20 15:34:21,370 - sglang - INFO - [2025-07-20 15:34:21 TP0] Decode batch. #running-req: 10, #token: 30599, token usage: 0.81, gen throughput (token/s): 392.54, #queue-req: 310
- 2025-07-20 15:34:21,370 - __main__ - INFO - sglang running req: 10 queue req: 310
- 2025-07-20 15:34:21,617 - sglang - INFO - [2025-07-20 15:34:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2581, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 309
- 2025-07-20 15:34:21,617 - __main__ - INFO - sglang running req: 9 queue req: 309
- 2025-07-20 15:34:22,989 - sglang - INFO - [2025-07-20 15:34:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2861, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 308
- 2025-07-20 15:34:22,989 - __main__ - INFO - sglang running req: 9 queue req: 308
- 2025-07-20 15:34:23,733 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:23,734 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 804.17 1004.08
- sglang_output_tokens 231.18 292.01
- 2025-07-20 15:34:23,734 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 192 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:23,964 - sglang - INFO - [2025-07-20 15:34:23 TP0] Decode batch. #running-req: 10, #token: 29355, token usage: 0.77, gen throughput (token/s): 153.42, #queue-req: 308
- 2025-07-20 15:34:23,964 - __main__ - INFO - sglang running req: 10 queue req: 308
- 2025-07-20 15:34:24,038 - sglang - INFO - [2025-07-20 15:34:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2070, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 307
- 2025-07-20 15:34:24,038 - __main__ - INFO - sglang running req: 9 queue req: 307
- 2025-07-20 15:34:25,623 - sglang - INFO - [2025-07-20 15:34:25 TP0] Decode batch. #running-req: 10, #token: 28197, token usage: 0.74, gen throughput (token/s): 240.53, #queue-req: 307
- 2025-07-20 15:34:25,623 - __main__ - INFO - sglang running req: 10 queue req: 307
- 2025-07-20 15:34:26,334 - sglang - INFO - [2025-07-20 15:34:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2258, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 306
- 2025-07-20 15:34:26,334 - __main__ - INFO - sglang running req: 9 queue req: 306
- 2025-07-20 15:34:27,329 - sglang - INFO - [2025-07-20 15:34:27 TP0] Decode batch. #running-req: 10, #token: 29047, token usage: 0.76, gen throughput (token/s): 233.88, #queue-req: 306
- 2025-07-20 15:34:27,329 - __main__ - INFO - sglang running req: 10 queue req: 306
- 2025-07-20 15:34:28,315 - sglang - INFO - [2025-07-20 15:34:28 TP0] Decode batch. #running-req: 10, #token: 29447, token usage: 0.78, gen throughput (token/s): 405.65, #queue-req: 306
- 2025-07-20 15:34:28,316 - __main__ - INFO - sglang running req: 10 queue req: 306
- 2025-07-20 15:34:29,296 - sglang - INFO - [2025-07-20 15:34:29 TP0] Decode batch. #running-req: 10, #token: 29847, token usage: 0.79, gen throughput (token/s): 407.73, #queue-req: 306
- 2025-07-20 15:34:29,297 - __main__ - INFO - sglang running req: 10 queue req: 306
- 2025-07-20 15:34:29,370 - sglang - INFO - [2025-07-20 15:34:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2209, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 305
- 2025-07-20 15:34:29,370 - __main__ - INFO - sglang running req: 9 queue req: 305
- 2025-07-20 15:34:30,289 - sglang - INFO - [2025-07-20 15:34:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4820, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 9, #queue-req: 303
- 2025-07-20 15:34:30,290 - __main__ - INFO - sglang running req: 9 queue req: 303
- 2025-07-20 15:34:32,436 - sglang - INFO - [2025-07-20 15:34:32 TP0] Decode batch. #running-req: 11, #token: 28999, token usage: 0.76, gen throughput (token/s): 135.99, #queue-req: 303
- 2025-07-20 15:34:32,436 - __main__ - INFO - sglang running req: 11 queue req: 303
- 2025-07-20 15:34:33,487 - sglang - INFO - [2025-07-20 15:34:33 TP0] Decode batch. #running-req: 11, #token: 29439, token usage: 0.77, gen throughput (token/s): 418.63, #queue-req: 303
- 2025-07-20 15:34:33,488 - __main__ - INFO - sglang running req: 11 queue req: 303
- 2025-07-20 15:34:33,734 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:33,735 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 807.14 1018.64
- sglang_output_tokens 234.06 299.89
- 2025-07-20 15:34:33,735 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 196 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:34,579 - sglang - INFO - [2025-07-20 15:34:34 TP0] Decode batch. #running-req: 11, #token: 29879, token usage: 0.79, gen throughput (token/s): 402.92, #queue-req: 303
- 2025-07-20 15:34:34,580 - __main__ - INFO - sglang running req: 11 queue req: 303
- 2025-07-20 15:34:35,567 - sglang - INFO - [2025-07-20 15:34:35 TP0] Decode batch. #running-req: 11, #token: 30319, token usage: 0.80, gen throughput (token/s): 445.39, #queue-req: 303
- 2025-07-20 15:34:35,568 - __main__ - INFO - sglang running req: 11 queue req: 303
- 2025-07-20 15:34:35,938 - sglang - INFO - [2025-07-20 15:34:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2439, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 302
- 2025-07-20 15:34:35,938 - __main__ - INFO - sglang running req: 10 queue req: 302
- 2025-07-20 15:34:37,306 - sglang - INFO - [2025-07-20 15:34:37 TP0] Decode batch. #running-req: 11, #token: 30096, token usage: 0.79, gen throughput (token/s): 252.55, #queue-req: 302
- 2025-07-20 15:34:37,306 - __main__ - INFO - sglang running req: 11 queue req: 302
- 2025-07-20 15:34:37,454 - sglang - INFO - [2025-07-20 15:34:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2664, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 301
- 2025-07-20 15:34:37,454 - __main__ - INFO - sglang running req: 10 queue req: 301
- 2025-07-20 15:34:38,874 - sglang - INFO - [2025-07-20 15:34:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1794, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 300
- 2025-07-20 15:34:38,874 - __main__ - INFO - sglang running req: 10 queue req: 300
- 2025-07-20 15:34:39,571 - sglang - INFO - [2025-07-20 15:34:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2775, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 299
- 2025-07-20 15:34:39,571 - __main__ - INFO - sglang running req: 10 queue req: 299
- 2025-07-20 15:34:40,551 - sglang - INFO - [2025-07-20 15:34:40 TP0] Decode batch. #running-req: 11, #token: 28845, token usage: 0.76, gen throughput (token/s): 134.66, #queue-req: 299
- 2025-07-20 15:34:40,551 - __main__ - INFO - sglang running req: 11 queue req: 299
- 2025-07-20 15:34:41,535 - sglang - INFO - [2025-07-20 15:34:41 TP0] Decode batch. #running-req: 11, #token: 29285, token usage: 0.77, gen throughput (token/s): 446.92, #queue-req: 299
- 2025-07-20 15:34:41,535 - __main__ - INFO - sglang running req: 11 queue req: 299
- 2025-07-20 15:34:42,522 - sglang - INFO - [2025-07-20 15:34:42 TP0] Decode batch. #running-req: 11, #token: 29725, token usage: 0.78, gen throughput (token/s): 445.72, #queue-req: 299
- 2025-07-20 15:34:42,523 - __main__ - INFO - sglang running req: 11 queue req: 299
- 2025-07-20 15:34:43,510 - sglang - INFO - [2025-07-20 15:34:43 TP0] Decode batch. #running-req: 11, #token: 30165, token usage: 0.79, gen throughput (token/s): 445.44, #queue-req: 299
- 2025-07-20 15:34:43,511 - __main__ - INFO - sglang running req: 11 queue req: 299
- 2025-07-20 15:34:43,737 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:43,738 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 809.36 1014.52
- sglang_output_tokens 234.57 297.32
- 2025-07-20 15:34:43,738 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 200 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:44,127 - sglang - INFO - [2025-07-20 15:34:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1674, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 298
- 2025-07-20 15:34:44,127 - __main__ - INFO - sglang running req: 10 queue req: 298
- 2025-07-20 15:34:45,097 - sglang - INFO - [2025-07-20 15:34:45 TP0] Decode batch. #running-req: 11, #token: 29667, token usage: 0.78, gen throughput (token/s): 276.64, #queue-req: 298
- 2025-07-20 15:34:45,097 - __main__ - INFO - sglang running req: 11 queue req: 298
- 2025-07-20 15:34:46,083 - sglang - INFO - [2025-07-20 15:34:46 TP0] Decode batch. #running-req: 11, #token: 30107, token usage: 0.79, gen throughput (token/s): 446.18, #queue-req: 298
- 2025-07-20 15:34:46,084 - __main__ - INFO - sglang running req: 11 queue req: 298
- 2025-07-20 15:34:46,998 - sglang - INFO - [2025-07-20 15:34:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 297
- 2025-07-20 15:34:46,998 - __main__ - INFO - sglang running req: 10 queue req: 297
- 2025-07-20 15:34:47,877 - sglang - INFO - [2025-07-20 15:34:47 TP0] Decode batch. #running-req: 11, #token: 30494, token usage: 0.80, gen throughput (token/s): 244.73, #queue-req: 297
- 2025-07-20 15:34:47,877 - __main__ - INFO - sglang running req: 11 queue req: 297
- 2025-07-20 15:34:48,866 - sglang - INFO - [2025-07-20 15:34:48 TP0] Decode batch. #running-req: 11, #token: 30934, token usage: 0.81, gen throughput (token/s): 445.05, #queue-req: 297
- 2025-07-20 15:34:48,866 - __main__ - INFO - sglang running req: 11 queue req: 297
- 2025-07-20 15:34:49,411 - sglang - INFO - [2025-07-20 15:34:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2649, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 296
- 2025-07-20 15:34:49,411 - __main__ - INFO - sglang running req: 10 queue req: 296
- 2025-07-20 15:34:50,643 - sglang - INFO - [2025-07-20 15:34:50 TP0] Decode batch. #running-req: 11, #token: 30710, token usage: 0.81, gen throughput (token/s): 247.03, #queue-req: 296
- 2025-07-20 15:34:50,643 - __main__ - INFO - sglang running req: 11 queue req: 296
- 2025-07-20 15:34:51,607 - sglang - INFO - [2025-07-20 15:34:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2762, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 295
- 2025-07-20 15:34:51,607 - __main__ - INFO - sglang running req: 10 queue req: 295
- 2025-07-20 15:34:52,437 - sglang - INFO - [2025-07-20 15:34:52 TP0] Decode batch. #running-req: 11, #token: 31107, token usage: 0.82, gen throughput (token/s): 244.69, #queue-req: 295
- 2025-07-20 15:34:52,437 - __main__ - INFO - sglang running req: 11 queue req: 295
- 2025-07-20 15:34:53,379 - sglang - INFO - [2025-07-20 15:34:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2448, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 294
- 2025-07-20 15:34:53,379 - __main__ - INFO - sglang running req: 10 queue req: 294
- 2025-07-20 15:34:53,739 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:34:53,739 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 814.05 1004.03
- sglang_output_tokens 235.40 292.72
- 2025-07-20 15:34:53,740 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 205 | 500
- 1 | 0 | 10
- 2025-07-20 15:34:54,181 - sglang - INFO - [2025-07-20 15:34:54 TP0] Decode batch. #running-req: 11, #token: 31794, token usage: 0.84, gen throughput (token/s): 251.75, #queue-req: 294
- 2025-07-20 15:34:54,181 - __main__ - INFO - sglang running req: 11 queue req: 294
- 2025-07-20 15:34:55,174 - sglang - INFO - [2025-07-20 15:34:55 TP0] Decode batch. #running-req: 11, #token: 28527, token usage: 0.75, gen throughput (token/s): 442.99, #queue-req: 294
- 2025-07-20 15:34:55,174 - __main__ - INFO - sglang running req: 11 queue req: 294
- 2025-07-20 15:34:55,199 - sglang - INFO - [2025-07-20 15:34:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 293
- 2025-07-20 15:34:55,199 - __main__ - INFO - sglang running req: 10 queue req: 293
- 2025-07-20 15:34:56,497 - sglang - INFO - [2025-07-20 15:34:56 TP0] Prefill batch. #new-seq: 1, #new-token: 1271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 292
- 2025-07-20 15:34:56,497 - __main__ - INFO - sglang running req: 10 queue req: 292
- 2025-07-20 15:34:57,423 - sglang - INFO - [2025-07-20 15:34:57 TP0] Decode batch. #running-req: 11, #token: 30721, token usage: 0.81, gen throughput (token/s): 194.72, #queue-req: 292
- 2025-07-20 15:34:57,424 - __main__ - INFO - sglang running req: 11 queue req: 292
- 2025-07-20 15:34:58,414 - sglang - INFO - [2025-07-20 15:34:58 TP0] Decode batch. #running-req: 11, #token: 31161, token usage: 0.82, gen throughput (token/s): 444.31, #queue-req: 292
- 2025-07-20 15:34:58,414 - __main__ - INFO - sglang running req: 11 queue req: 292
- 2025-07-20 15:34:59,403 - sglang - INFO - [2025-07-20 15:34:59 TP0] Decode batch. #running-req: 11, #token: 31601, token usage: 0.83, gen throughput (token/s): 444.59, #queue-req: 292
- 2025-07-20 15:34:59,404 - __main__ - INFO - sglang running req: 11 queue req: 292
- 2025-07-20 15:35:00,410 - sglang - INFO - [2025-07-20 15:35:00 TP0] Decode batch. #running-req: 11, #token: 32041, token usage: 0.84, gen throughput (token/s): 436.93, #queue-req: 292
- 2025-07-20 15:35:00,410 - __main__ - INFO - sglang running req: 11 queue req: 292
- 2025-07-20 15:35:00,500 - sglang - INFO - [2025-07-20 15:35:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 291
- 2025-07-20 15:35:00,500 - __main__ - INFO - sglang running req: 10 queue req: 291
- 2025-07-20 15:35:01,576 - sglang - INFO - [2025-07-20 15:35:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 290
- 2025-07-20 15:35:01,576 - __main__ - INFO - sglang running req: 10 queue req: 290
- 2025-07-20 15:35:02,944 - sglang - INFO - [2025-07-20 15:35:02 TP0] Decode batch. #running-req: 11, #token: 32668, token usage: 0.86, gen throughput (token/s): 172.90, #queue-req: 290
- 2025-07-20 15:35:02,944 - __main__ - INFO - sglang running req: 11 queue req: 290
- 2025-07-20 15:35:03,740 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:03,741 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 814.11 1017.64
- sglang_output_tokens 235.06 296.45
- 2025-07-20 15:35:03,741 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 209 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:03,939 - sglang - INFO - [2025-07-20 15:35:03 TP0] Decode batch. #running-req: 11, #token: 33108, token usage: 0.87, gen throughput (token/s): 441.85, #queue-req: 290
- 2025-07-20 15:35:03,940 - __main__ - INFO - sglang running req: 11 queue req: 290
- 2025-07-20 15:35:04,937 - sglang - INFO - [2025-07-20 15:35:04 TP0] Decode batch. #running-req: 11, #token: 33548, token usage: 0.88, gen throughput (token/s): 441.12, #queue-req: 290
- 2025-07-20 15:35:04,937 - __main__ - INFO - sglang running req: 11 queue req: 290
- 2025-07-20 15:35:05,934 - sglang - INFO - [2025-07-20 15:35:05 TP0] Decode batch. #running-req: 11, #token: 33988, token usage: 0.89, gen throughput (token/s): 441.40, #queue-req: 290
- 2025-07-20 15:35:05,934 - __main__ - INFO - sglang running req: 11 queue req: 290
- 2025-07-20 15:35:06,406 - sglang - INFO - [2025-07-20 15:35:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1377, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.81, #running-req: 10, #queue-req: 289
- 2025-07-20 15:35:06,407 - __main__ - INFO - sglang running req: 10 queue req: 289
- 2025-07-20 15:35:07,422 - sglang - INFO - [2025-07-20 15:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2245, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 288
- 2025-07-20 15:35:07,422 - __main__ - INFO - sglang running req: 10 queue req: 288
- 2025-07-20 15:35:08,170 - sglang - INFO - [2025-07-20 15:35:08 TP0] Decode batch. #running-req: 11, #token: 30992, token usage: 0.82, gen throughput (token/s): 195.83, #queue-req: 288
- 2025-07-20 15:35:08,171 - __main__ - INFO - sglang running req: 11 queue req: 288
- 2025-07-20 15:35:08,716 - sglang - INFO - [2025-07-20 15:35:08 TP0] Prefill batch. #new-seq: 1, #new-token: 1945, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 287
- 2025-07-20 15:35:08,716 - __main__ - INFO - sglang running req: 10 queue req: 287
- 2025-07-20 15:35:09,814 - sglang - INFO - [2025-07-20 15:35:09 TP0] Decode batch. #running-req: 11, #token: 30051, token usage: 0.79, gen throughput (token/s): 267.10, #queue-req: 287
- 2025-07-20 15:35:09,814 - __main__ - INFO - sglang running req: 11 queue req: 287
- 2025-07-20 15:35:10,358 - sglang - INFO - [2025-07-20 15:35:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2913, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 286
- 2025-07-20 15:35:10,358 - __main__ - INFO - sglang running req: 10 queue req: 286
- 2025-07-20 15:35:11,634 - sglang - INFO - [2025-07-20 15:35:11 TP0] Decode batch. #running-req: 11, #token: 29865, token usage: 0.79, gen throughput (token/s): 241.14, #queue-req: 286
- 2025-07-20 15:35:11,635 - __main__ - INFO - sglang running req: 11 queue req: 286
- 2025-07-20 15:35:12,475 - sglang - INFO - [2025-07-20 15:35:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2254, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 285
- 2025-07-20 15:35:12,475 - __main__ - INFO - sglang running req: 10 queue req: 285
- 2025-07-20 15:35:13,348 - sglang - INFO - [2025-07-20 15:35:13 TP0] Decode batch. #running-req: 11, #token: 31047, token usage: 0.82, gen throughput (token/s): 256.15, #queue-req: 285
- 2025-07-20 15:35:13,348 - __main__ - INFO - sglang running req: 11 queue req: 285
- 2025-07-20 15:35:13,742 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:13,743 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 819.86 1014.03
- sglang_output_tokens 237.29 295.29
- 2025-07-20 15:35:13,743 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 214 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:14,338 - sglang - INFO - [2025-07-20 15:35:14 TP0] Decode batch. #running-req: 11, #token: 31487, token usage: 0.83, gen throughput (token/s): 444.65, #queue-req: 285
- 2025-07-20 15:35:14,338 - __main__ - INFO - sglang running req: 11 queue req: 285
- 2025-07-20 15:35:15,327 - sglang - INFO - [2025-07-20 15:35:15 TP0] Decode batch. #running-req: 11, #token: 31927, token usage: 0.84, gen throughput (token/s): 444.73, #queue-req: 285
- 2025-07-20 15:35:15,327 - __main__ - INFO - sglang running req: 11 queue req: 285
- 2025-07-20 15:35:16,321 - sglang - INFO - [2025-07-20 15:35:16 TP0] Decode batch. #running-req: 11, #token: 32367, token usage: 0.85, gen throughput (token/s): 442.81, #queue-req: 285
- 2025-07-20 15:35:16,321 - __main__ - INFO - sglang running req: 11 queue req: 285
- 2025-07-20 15:35:17,335 - sglang - INFO - [2025-07-20 15:35:17 TP0] Decode batch. #running-req: 11, #token: 32807, token usage: 0.86, gen throughput (token/s): 433.69, #queue-req: 285
- 2025-07-20 15:35:17,336 - __main__ - INFO - sglang running req: 11 queue req: 285
- 2025-07-20 15:35:18,083 - sglang - INFO - [2025-07-20 15:35:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2520, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 284
- 2025-07-20 15:35:18,083 - __main__ - INFO - sglang running req: 10 queue req: 284
- 2025-07-20 15:35:19,089 - sglang - INFO - [2025-07-20 15:35:19 TP0] Decode batch. #running-req: 11, #token: 32310, token usage: 0.85, gen throughput (token/s): 250.33, #queue-req: 284
- 2025-07-20 15:35:19,089 - __main__ - INFO - sglang running req: 11 queue req: 284
- 2025-07-20 15:35:19,585 - sglang - INFO - [2025-07-20 15:35:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2443, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 283
- 2025-07-20 15:35:19,585 - __main__ - INFO - sglang running req: 10 queue req: 283
- 2025-07-20 15:35:20,439 - sglang - INFO - [2025-07-20 15:35:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 282
- 2025-07-20 15:35:20,439 - __main__ - INFO - sglang running req: 10 queue req: 282
- 2025-07-20 15:35:21,622 - sglang - INFO - [2025-07-20 15:35:21 TP0] Decode batch. #running-req: 11, #token: 30645, token usage: 0.81, gen throughput (token/s): 172.92, #queue-req: 282
- 2025-07-20 15:35:21,622 - __main__ - INFO - sglang running req: 11 queue req: 282
- 2025-07-20 15:35:22,537 - sglang - INFO - [2025-07-20 15:35:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2776, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 281
- 2025-07-20 15:35:22,537 - __main__ - INFO - sglang running req: 10 queue req: 281
- 2025-07-20 15:35:23,417 - sglang - INFO - [2025-07-20 15:35:23 TP0] Decode batch. #running-req: 11, #token: 30614, token usage: 0.81, gen throughput (token/s): 244.55, #queue-req: 281
- 2025-07-20 15:35:23,418 - __main__ - INFO - sglang running req: 11 queue req: 281
- 2025-07-20 15:35:23,744 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:23,745 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 823.72 1000.82
- sglang_output_tokens 238.76 291.81
- 2025-07-20 15:35:23,745 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 218 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:24,407 - sglang - INFO - [2025-07-20 15:35:24 TP0] Decode batch. #running-req: 11, #token: 31054, token usage: 0.82, gen throughput (token/s): 444.43, #queue-req: 281
- 2025-07-20 15:35:24,407 - __main__ - INFO - sglang running req: 11 queue req: 281
- 2025-07-20 15:35:25,398 - sglang - INFO - [2025-07-20 15:35:25 TP0] Decode batch. #running-req: 11, #token: 31494, token usage: 0.83, gen throughput (token/s): 444.01, #queue-req: 281
- 2025-07-20 15:35:25,398 - __main__ - INFO - sglang running req: 11 queue req: 281
- 2025-07-20 15:35:26,217 - sglang - INFO - [2025-07-20 15:35:26 TP0] Prefill batch. #new-seq: 1, #new-token: 1999, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 280
- 2025-07-20 15:35:26,217 - __main__ - INFO - sglang running req: 10 queue req: 280
- 2025-07-20 15:35:27,046 - sglang - INFO - [2025-07-20 15:35:27 TP0] Decode batch. #running-req: 11, #token: 30585, token usage: 0.81, gen throughput (token/s): 266.46, #queue-req: 280
- 2025-07-20 15:35:27,046 - __main__ - INFO - sglang running req: 11 queue req: 280
- 2025-07-20 15:35:28,035 - sglang - INFO - [2025-07-20 15:35:28 TP0] Decode batch. #running-req: 11, #token: 31025, token usage: 0.82, gen throughput (token/s): 444.72, #queue-req: 280
- 2025-07-20 15:35:28,035 - __main__ - INFO - sglang running req: 11 queue req: 280
- 2025-07-20 15:35:28,579 - sglang - INFO - [2025-07-20 15:35:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2120, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 279
- 2025-07-20 15:35:28,579 - __main__ - INFO - sglang running req: 10 queue req: 279
- 2025-07-20 15:35:29,495 - sglang - INFO - [2025-07-20 15:35:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 278
- 2025-07-20 15:35:29,495 - __main__ - INFO - sglang running req: 10 queue req: 278
- 2025-07-20 15:35:30,481 - sglang - INFO - [2025-07-20 15:35:30 TP0] Decode batch. #running-req: 11, #token: 30303, token usage: 0.80, gen throughput (token/s): 179.09, #queue-req: 278
- 2025-07-20 15:35:30,481 - __main__ - INFO - sglang running req: 11 queue req: 278
- 2025-07-20 15:35:31,184 - sglang - INFO - [2025-07-20 15:35:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2368, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 277
- 2025-07-20 15:35:31,184 - __main__ - INFO - sglang running req: 10 queue req: 277
- 2025-07-20 15:35:32,252 - sglang - INFO - [2025-07-20 15:35:32 TP0] Decode batch. #running-req: 11, #token: 30238, token usage: 0.80, gen throughput (token/s): 247.89, #queue-req: 277
- 2025-07-20 15:35:32,252 - __main__ - INFO - sglang running req: 11 queue req: 277
- 2025-07-20 15:35:32,425 - sglang - INFO - [2025-07-20 15:35:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 275
- 2025-07-20 15:35:32,425 - __main__ - INFO - sglang running req: 9 queue req: 275
- 2025-07-20 15:35:33,746 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:33,747 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 832.30 1035.52
- sglang_output_tokens 241.61 304.06
- 2025-07-20 15:35:33,747 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 224 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:34,711 - sglang - INFO - [2025-07-20 15:35:34 TP0] Decode batch. #running-req: 11, #token: 29797, token usage: 0.78, gen throughput (token/s): 178.13, #queue-req: 275
- 2025-07-20 15:35:34,711 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:35,700 - sglang - INFO - [2025-07-20 15:35:35 TP0] Decode batch. #running-req: 11, #token: 30237, token usage: 0.80, gen throughput (token/s): 444.72, #queue-req: 275
- 2025-07-20 15:35:35,700 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:36,690 - sglang - INFO - [2025-07-20 15:35:36 TP0] Decode batch. #running-req: 11, #token: 30677, token usage: 0.81, gen throughput (token/s): 444.43, #queue-req: 275
- 2025-07-20 15:35:36,690 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:37,682 - sglang - INFO - [2025-07-20 15:35:37 TP0] Decode batch. #running-req: 11, #token: 31117, token usage: 0.82, gen throughput (token/s): 443.73, #queue-req: 275
- 2025-07-20 15:35:37,682 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:38,677 - sglang - INFO - [2025-07-20 15:35:38 TP0] Decode batch. #running-req: 11, #token: 31557, token usage: 0.83, gen throughput (token/s): 441.90, #queue-req: 275
- 2025-07-20 15:35:38,678 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:39,672 - sglang - INFO - [2025-07-20 15:35:39 TP0] Decode batch. #running-req: 11, #token: 31997, token usage: 0.84, gen throughput (token/s): 442.33, #queue-req: 275
- 2025-07-20 15:35:39,672 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:40,667 - sglang - INFO - [2025-07-20 15:35:40 TP0] Decode batch. #running-req: 11, #token: 32437, token usage: 0.85, gen throughput (token/s): 442.26, #queue-req: 275
- 2025-07-20 15:35:40,667 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:41,680 - sglang - INFO - [2025-07-20 15:35:41 TP0] Decode batch. #running-req: 11, #token: 32877, token usage: 0.87, gen throughput (token/s): 434.35, #queue-req: 275
- 2025-07-20 15:35:41,680 - __main__ - INFO - sglang running req: 11 queue req: 275
- 2025-07-20 15:35:42,684 - sglang - INFO - [2025-07-20 15:35:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 274
- 2025-07-20 15:35:42,684 - __main__ - INFO - sglang running req: 10 queue req: 274
- 2025-07-20 15:35:43,563 - sglang - INFO - [2025-07-20 15:35:43 TP0] Decode batch. #running-req: 11, #token: 31753, token usage: 0.84, gen throughput (token/s): 233.12, #queue-req: 274
- 2025-07-20 15:35:43,563 - __main__ - INFO - sglang running req: 11 queue req: 274
- 2025-07-20 15:35:43,687 - sglang - INFO - [2025-07-20 15:35:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 273
- 2025-07-20 15:35:43,688 - __main__ - INFO - sglang running req: 10 queue req: 273
- 2025-07-20 15:35:43,748 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:43,748 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 827.86 1002.29
- sglang_output_tokens 240.40 292.39
- 2025-07-20 15:35:43,749 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 226 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:45,303 - sglang - INFO - [2025-07-20 15:35:45 TP0] Decode batch. #running-req: 11, #token: 31331, token usage: 0.82, gen throughput (token/s): 252.27, #queue-req: 273
- 2025-07-20 15:35:45,304 - __main__ - INFO - sglang running req: 11 queue req: 273
- 2025-07-20 15:35:46,024 - sglang - INFO - [2025-07-20 15:35:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 272
- 2025-07-20 15:35:46,024 - __main__ - INFO - sglang running req: 10 queue req: 272
- 2025-07-20 15:35:47,128 - sglang - INFO - [2025-07-20 15:35:47 TP0] Decode batch. #running-req: 11, #token: 32076, token usage: 0.84, gen throughput (token/s): 240.53, #queue-req: 272
- 2025-07-20 15:35:47,129 - __main__ - INFO - sglang running req: 11 queue req: 272
- 2025-07-20 15:35:48,124 - sglang - INFO - [2025-07-20 15:35:48 TP0] Decode batch. #running-req: 11, #token: 32516, token usage: 0.86, gen throughput (token/s): 442.15, #queue-req: 272
- 2025-07-20 15:35:48,124 - __main__ - INFO - sglang running req: 11 queue req: 272
- 2025-07-20 15:35:48,448 - sglang - INFO - [2025-07-20 15:35:48 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 271
- 2025-07-20 15:35:48,448 - __main__ - INFO - sglang running req: 10 queue req: 271
- 2025-07-20 15:35:49,804 - sglang - INFO - [2025-07-20 15:35:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 270
- 2025-07-20 15:35:49,804 - __main__ - INFO - sglang running req: 10 queue req: 270
- 2025-07-20 15:35:50,731 - sglang - INFO - [2025-07-20 15:35:50 TP0] Decode batch. #running-req: 11, #token: 32027, token usage: 0.84, gen throughput (token/s): 167.96, #queue-req: 270
- 2025-07-20 15:35:50,732 - __main__ - INFO - sglang running req: 11 queue req: 270
- 2025-07-20 15:35:51,724 - sglang - INFO - [2025-07-20 15:35:51 TP0] Decode batch. #running-req: 11, #token: 32467, token usage: 0.85, gen throughput (token/s): 443.35, #queue-req: 270
- 2025-07-20 15:35:51,724 - __main__ - INFO - sglang running req: 11 queue req: 270
- 2025-07-20 15:35:52,296 - sglang - INFO - [2025-07-20 15:35:52 TP0] Prefill batch. #new-seq: 1, #new-token: 1462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 269
- 2025-07-20 15:35:52,296 - __main__ - INFO - sglang running req: 10 queue req: 269
- 2025-07-20 15:35:53,282 - sglang - INFO - [2025-07-20 15:35:53 TP0] Decode batch. #running-req: 11, #token: 31608, token usage: 0.83, gen throughput (token/s): 281.84, #queue-req: 269
- 2025-07-20 15:35:53,282 - __main__ - INFO - sglang running req: 11 queue req: 269
- 2025-07-20 15:35:53,381 - sglang - INFO - [2025-07-20 15:35:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 268
- 2025-07-20 15:35:53,382 - __main__ - INFO - sglang running req: 10 queue req: 268
- 2025-07-20 15:35:53,750 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:35:53,750 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 833.40 1003.15
- sglang_output_tokens 241.85 291.81
- 2025-07-20 15:35:53,750 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 231 | 500
- 1 | 0 | 10
- 2025-07-20 15:35:54,903 - sglang - INFO - [2025-07-20 15:35:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 267
- 2025-07-20 15:35:54,903 - __main__ - INFO - sglang running req: 10 queue req: 267
- 2025-07-20 15:35:55,756 - sglang - INFO - [2025-07-20 15:35:55 TP0] Decode batch. #running-req: 11, #token: 29744, token usage: 0.78, gen throughput (token/s): 177.01, #queue-req: 267
- 2025-07-20 15:35:55,757 - __main__ - INFO - sglang running req: 11 queue req: 267
- 2025-07-20 15:35:56,747 - sglang - INFO - [2025-07-20 15:35:56 TP0] Decode batch. #running-req: 11, #token: 30184, token usage: 0.79, gen throughput (token/s): 443.98, #queue-req: 267
- 2025-07-20 15:35:56,747 - __main__ - INFO - sglang running req: 11 queue req: 267
- 2025-07-20 15:35:57,069 - sglang - INFO - [2025-07-20 15:35:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2411, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 266
- 2025-07-20 15:35:57,069 - __main__ - INFO - sglang running req: 10 queue req: 266
- 2025-07-20 15:35:58,475 - sglang - INFO - [2025-07-20 15:35:58 TP0] Decode batch. #running-req: 11, #token: 30202, token usage: 0.80, gen throughput (token/s): 254.09, #queue-req: 266
- 2025-07-20 15:35:58,475 - __main__ - INFO - sglang running req: 11 queue req: 266
- 2025-07-20 15:35:59,464 - sglang - INFO - [2025-07-20 15:35:59 TP0] Decode batch. #running-req: 11, #token: 30642, token usage: 0.81, gen throughput (token/s): 444.68, #queue-req: 266
- 2025-07-20 15:35:59,464 - __main__ - INFO - sglang running req: 11 queue req: 266
- 2025-07-20 15:35:59,637 - sglang - INFO - [2025-07-20 15:35:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2764, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 265
- 2025-07-20 15:35:59,638 - __main__ - INFO - sglang running req: 10 queue req: 265
- 2025-07-20 15:36:01,265 - sglang - INFO - [2025-07-20 15:36:01 TP0] Decode batch. #running-req: 11, #token: 30360, token usage: 0.80, gen throughput (token/s): 243.71, #queue-req: 265
- 2025-07-20 15:36:01,266 - __main__ - INFO - sglang running req: 11 queue req: 265
- 2025-07-20 15:36:02,259 - sglang - INFO - [2025-07-20 15:36:02 TP0] Decode batch. #running-req: 11, #token: 30800, token usage: 0.81, gen throughput (token/s): 442.91, #queue-req: 265
- 2025-07-20 15:36:02,259 - __main__ - INFO - sglang running req: 11 queue req: 265
- 2025-07-20 15:36:02,781 - sglang - INFO - [2025-07-20 15:36:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2684, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 264
- 2025-07-20 15:36:02,781 - __main__ - INFO - sglang running req: 10 queue req: 264
- 2025-07-20 15:36:03,597 - sglang - INFO - [2025-07-20 15:36:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 263
- 2025-07-20 15:36:03,598 - __main__ - INFO - sglang running req: 10 queue req: 263
- 2025-07-20 15:36:03,751 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:03,752 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 838.08 1032.06
- sglang_output_tokens 243.57 302.53
- 2025-07-20 15:36:03,752 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 236 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:04,702 - sglang - INFO - [2025-07-20 15:36:04 TP0] Decode batch. #running-req: 11, #token: 30560, token usage: 0.80, gen throughput (token/s): 179.28, #queue-req: 263
- 2025-07-20 15:36:04,702 - __main__ - INFO - sglang running req: 11 queue req: 263
- 2025-07-20 15:36:04,826 - sglang - INFO - [2025-07-20 15:36:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 262
- 2025-07-20 15:36:04,827 - __main__ - INFO - sglang running req: 10 queue req: 262
- 2025-07-20 15:36:06,531 - sglang - INFO - [2025-07-20 15:36:06 TP0] Decode batch. #running-req: 11, #token: 31257, token usage: 0.82, gen throughput (token/s): 239.99, #queue-req: 262
- 2025-07-20 15:36:06,531 - __main__ - INFO - sglang running req: 11 queue req: 262
- 2025-07-20 15:36:07,225 - sglang - INFO - [2025-07-20 15:36:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 261
- 2025-07-20 15:36:07,226 - __main__ - INFO - sglang running req: 10 queue req: 261
- 2025-07-20 15:36:08,275 - sglang - INFO - [2025-07-20 15:36:08 TP0] Decode batch. #running-req: 11, #token: 31209, token usage: 0.82, gen throughput (token/s): 251.81, #queue-req: 261
- 2025-07-20 15:36:08,275 - __main__ - INFO - sglang running req: 11 queue req: 261
- 2025-07-20 15:36:09,269 - sglang - INFO - [2025-07-20 15:36:09 TP0] Decode batch. #running-req: 11, #token: 31649, token usage: 0.83, gen throughput (token/s): 442.74, #queue-req: 261
- 2025-07-20 15:36:09,269 - __main__ - INFO - sglang running req: 11 queue req: 261
- 2025-07-20 15:36:10,264 - sglang - INFO - [2025-07-20 15:36:10 TP0] Decode batch. #running-req: 11, #token: 32089, token usage: 0.84, gen throughput (token/s): 441.86, #queue-req: 261
- 2025-07-20 15:36:10,265 - __main__ - INFO - sglang running req: 11 queue req: 261
- 2025-07-20 15:36:11,260 - sglang - INFO - [2025-07-20 15:36:11 TP0] Decode batch. #running-req: 11, #token: 32529, token usage: 0.86, gen throughput (token/s): 442.02, #queue-req: 261
- 2025-07-20 15:36:11,260 - __main__ - INFO - sglang running req: 11 queue req: 261
- 2025-07-20 15:36:12,131 - sglang - INFO - [2025-07-20 15:36:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 260
- 2025-07-20 15:36:12,132 - __main__ - INFO - sglang running req: 10 queue req: 260
- 2025-07-20 15:36:12,928 - sglang - INFO - [2025-07-20 15:36:12 TP0] Decode batch. #running-req: 11, #token: 32002, token usage: 0.84, gen throughput (token/s): 263.03, #queue-req: 260
- 2025-07-20 15:36:12,929 - __main__ - INFO - sglang running req: 11 queue req: 260
- 2025-07-20 15:36:13,753 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:13,753 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 836.11 1006.76
- sglang_output_tokens 242.24 294.29
- 2025-07-20 15:36:13,753 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 239 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:13,924 - sglang - INFO - [2025-07-20 15:36:13 TP0] Decode batch. #running-req: 11, #token: 32442, token usage: 0.85, gen throughput (token/s): 442.08, #queue-req: 260
- 2025-07-20 15:36:13,924 - __main__ - INFO - sglang running req: 11 queue req: 260
- 2025-07-20 15:36:14,918 - sglang - INFO - [2025-07-20 15:36:14 TP0] Decode batch. #running-req: 11, #token: 32882, token usage: 0.87, gen throughput (token/s): 442.48, #queue-req: 260
- 2025-07-20 15:36:14,919 - __main__ - INFO - sglang running req: 11 queue req: 260
- 2025-07-20 15:36:14,993 - sglang - INFO - [2025-07-20 15:36:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2197, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 259
- 2025-07-20 15:36:14,994 - __main__ - INFO - sglang running req: 10 queue req: 259
- 2025-07-20 15:36:16,466 - sglang - INFO - [2025-07-20 15:36:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 258
- 2025-07-20 15:36:16,466 - __main__ - INFO - sglang running req: 10 queue req: 258
- 2025-07-20 15:36:17,449 - sglang - INFO - [2025-07-20 15:36:17 TP0] Decode batch. #running-req: 11, #token: 32134, token usage: 0.85, gen throughput (token/s): 173.09, #queue-req: 258
- 2025-07-20 15:36:17,449 - __main__ - INFO - sglang running req: 11 queue req: 258
- 2025-07-20 15:36:18,444 - sglang - INFO - [2025-07-20 15:36:18 TP0] Decode batch. #running-req: 11, #token: 32574, token usage: 0.86, gen throughput (token/s): 442.36, #queue-req: 258
- 2025-07-20 15:36:18,444 - __main__ - INFO - sglang running req: 11 queue req: 258
- 2025-07-20 15:36:19,440 - sglang - INFO - [2025-07-20 15:36:19 TP0] Decode batch. #running-req: 11, #token: 33014, token usage: 0.87, gen throughput (token/s): 441.51, #queue-req: 258
- 2025-07-20 15:36:19,440 - __main__ - INFO - sglang running req: 11 queue req: 258
- 2025-07-20 15:36:19,515 - sglang - INFO - [2025-07-20 15:36:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 257
- 2025-07-20 15:36:19,516 - __main__ - INFO - sglang running req: 10 queue req: 257
- 2025-07-20 15:36:21,272 - sglang - INFO - [2025-07-20 15:36:21 TP0] Decode batch. #running-req: 11, #token: 32639, token usage: 0.86, gen throughput (token/s): 239.63, #queue-req: 257
- 2025-07-20 15:36:21,272 - __main__ - INFO - sglang running req: 11 queue req: 257
- 2025-07-20 15:36:22,274 - sglang - INFO - [2025-07-20 15:36:22 TP0] Decode batch. #running-req: 10, #token: 30564, token usage: 0.80, gen throughput (token/s): 429.16, #queue-req: 257
- 2025-07-20 15:36:22,274 - __main__ - INFO - sglang running req: 10 queue req: 257
- 2025-07-20 15:36:22,595 - sglang - INFO - [2025-07-20 15:36:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 256
- 2025-07-20 15:36:22,595 - __main__ - INFO - sglang running req: 9 queue req: 256
- 2025-07-20 15:36:23,756 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:23,756 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 842.21 1013.41
- sglang_output_tokens 243.98 295.25
- 2025-07-20 15:36:23,756 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 244 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:23,988 - sglang - INFO - [2025-07-20 15:36:23 TP0] Decode batch. #running-req: 10, #token: 29411, token usage: 0.77, gen throughput (token/s): 232.84, #queue-req: 256
- 2025-07-20 15:36:23,988 - __main__ - INFO - sglang running req: 10 queue req: 256
- 2025-07-20 15:36:24,727 - sglang - INFO - [2025-07-20 15:36:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 255
- 2025-07-20 15:36:24,727 - __main__ - INFO - sglang running req: 9 queue req: 255
- 2025-07-20 15:36:25,725 - sglang - INFO - [2025-07-20 15:36:25 TP0] Decode batch. #running-req: 10, #token: 28923, token usage: 0.76, gen throughput (token/s): 229.69, #queue-req: 255
- 2025-07-20 15:36:25,725 - __main__ - INFO - sglang running req: 10 queue req: 255
- 2025-07-20 15:36:25,971 - sglang - INFO - [2025-07-20 15:36:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 254
- 2025-07-20 15:36:25,972 - __main__ - INFO - sglang running req: 9 queue req: 254
- 2025-07-20 15:36:27,520 - sglang - INFO - [2025-07-20 15:36:27 TP0] Decode batch. #running-req: 10, #token: 29181, token usage: 0.77, gen throughput (token/s): 222.22, #queue-req: 254
- 2025-07-20 15:36:27,521 - __main__ - INFO - sglang running req: 10 queue req: 254
- 2025-07-20 15:36:28,508 - sglang - INFO - [2025-07-20 15:36:28 TP0] Decode batch. #running-req: 10, #token: 29581, token usage: 0.78, gen throughput (token/s): 405.02, #queue-req: 254
- 2025-07-20 15:36:28,508 - __main__ - INFO - sglang running req: 10 queue req: 254
- 2025-07-20 15:36:29,492 - sglang - INFO - [2025-07-20 15:36:29 TP0] Decode batch. #running-req: 10, #token: 29981, token usage: 0.79, gen throughput (token/s): 406.45, #queue-req: 254
- 2025-07-20 15:36:29,492 - __main__ - INFO - sglang running req: 10 queue req: 254
- 2025-07-20 15:36:30,479 - sglang - INFO - [2025-07-20 15:36:30 TP0] Decode batch. #running-req: 10, #token: 30381, token usage: 0.80, gen throughput (token/s): 405.15, #queue-req: 254
- 2025-07-20 15:36:30,479 - __main__ - INFO - sglang running req: 10 queue req: 254
- 2025-07-20 15:36:30,627 - sglang - INFO - [2025-07-20 15:36:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4872, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 8, #queue-req: 252
- 2025-07-20 15:36:30,627 - __main__ - INFO - sglang running req: 8 queue req: 252
- 2025-07-20 15:36:32,938 - sglang - INFO - [2025-07-20 15:36:32 TP0] Decode batch. #running-req: 10, #token: 28541, token usage: 0.75, gen throughput (token/s): 161.86, #queue-req: 252
- 2025-07-20 15:36:32,938 - __main__ - INFO - sglang running req: 10 queue req: 252
- 2025-07-20 15:36:33,750 - sglang - INFO - [2025-07-20 15:36:33 TP0] Prefill batch. #new-seq: 2, #new-token: 4028, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 250
- 2025-07-20 15:36:33,750 - __main__ - INFO - sglang running req: 9 queue req: 250
- 2025-07-20 15:36:33,757 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:33,757 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 848.88 1020.25
- sglang_output_tokens 246.07 298.15
- 2025-07-20 15:36:33,757 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 249 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:35,256 - sglang - INFO - [2025-07-20 15:36:35 TP0] Decode batch. #running-req: 11, #token: 29288, token usage: 0.77, gen throughput (token/s): 175.14, #queue-req: 250
- 2025-07-20 15:36:35,257 - __main__ - INFO - sglang running req: 11 queue req: 250
- 2025-07-20 15:36:36,084 - sglang - INFO - [2025-07-20 15:36:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2557, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 249
- 2025-07-20 15:36:36,084 - __main__ - INFO - sglang running req: 10 queue req: 249
- 2025-07-20 15:36:37,116 - sglang - INFO - [2025-07-20 15:36:37 TP0] Decode batch. #running-req: 11, #token: 29515, token usage: 0.78, gen throughput (token/s): 236.05, #queue-req: 249
- 2025-07-20 15:36:37,116 - __main__ - INFO - sglang running req: 11 queue req: 249
- 2025-07-20 15:36:38,106 - sglang - INFO - [2025-07-20 15:36:38 TP0] Decode batch. #running-req: 11, #token: 29955, token usage: 0.79, gen throughput (token/s): 444.40, #queue-req: 249
- 2025-07-20 15:36:38,106 - __main__ - INFO - sglang running req: 11 queue req: 249
- 2025-07-20 15:36:39,097 - sglang - INFO - [2025-07-20 15:36:39 TP0] Decode batch. #running-req: 11, #token: 30395, token usage: 0.80, gen throughput (token/s): 443.87, #queue-req: 249
- 2025-07-20 15:36:39,097 - __main__ - INFO - sglang running req: 11 queue req: 249
- 2025-07-20 15:36:39,917 - sglang - INFO - [2025-07-20 15:36:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2723, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 248
- 2025-07-20 15:36:39,917 - __main__ - INFO - sglang running req: 10 queue req: 248
- 2025-07-20 15:36:40,898 - sglang - INFO - [2025-07-20 15:36:40 TP0] Decode batch. #running-req: 11, #token: 30691, token usage: 0.81, gen throughput (token/s): 243.79, #queue-req: 248
- 2025-07-20 15:36:40,898 - __main__ - INFO - sglang running req: 11 queue req: 248
- 2025-07-20 15:36:41,890 - sglang - INFO - [2025-07-20 15:36:41 TP0] Decode batch. #running-req: 11, #token: 31131, token usage: 0.82, gen throughput (token/s): 443.71, #queue-req: 248
- 2025-07-20 15:36:41,890 - __main__ - INFO - sglang running req: 11 queue req: 248
- 2025-07-20 15:36:42,883 - sglang - INFO - [2025-07-20 15:36:42 TP0] Decode batch. #running-req: 10, #token: 29855, token usage: 0.79, gen throughput (token/s): 442.03, #queue-req: 248
- 2025-07-20 15:36:42,883 - __main__ - INFO - sglang running req: 10 queue req: 248
- 2025-07-20 15:36:43,758 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:43,758 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 845.09 1011.62
- sglang_output_tokens 244.79 295.57
- 2025-07-20 15:36:43,758 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 252 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:43,870 - sglang - INFO - [2025-07-20 15:36:43 TP0] Decode batch. #running-req: 10, #token: 30255, token usage: 0.80, gen throughput (token/s): 405.17, #queue-req: 248
- 2025-07-20 15:36:43,870 - __main__ - INFO - sglang running req: 10 queue req: 248
- 2025-07-20 15:36:43,994 - sglang - INFO - [2025-07-20 15:36:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 247
- 2025-07-20 15:36:43,994 - __main__ - INFO - sglang running req: 9 queue req: 247
- 2025-07-20 15:36:45,692 - sglang - INFO - [2025-07-20 15:36:45 TP0] Decode batch. #running-req: 10, #token: 30527, token usage: 0.80, gen throughput (token/s): 219.01, #queue-req: 247
- 2025-07-20 15:36:45,692 - __main__ - INFO - sglang running req: 10 queue req: 247
- 2025-07-20 15:36:46,461 - sglang - INFO - [2025-07-20 15:36:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 246
- 2025-07-20 15:36:46,461 - __main__ - INFO - sglang running req: 9 queue req: 246
- 2025-07-20 15:36:47,168 - sglang - INFO - [2025-07-20 15:36:47 TP0] Decode batch. #running-req: 10, #token: 28471, token usage: 0.75, gen throughput (token/s): 270.24, #queue-req: 246
- 2025-07-20 15:36:47,168 - __main__ - INFO - sglang running req: 10 queue req: 246
- 2025-07-20 15:36:47,783 - sglang - INFO - [2025-07-20 15:36:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2765, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 245
- 2025-07-20 15:36:47,784 - __main__ - INFO - sglang running req: 9 queue req: 245
- 2025-07-20 15:36:48,964 - sglang - INFO - [2025-07-20 15:36:48 TP0] Decode batch. #running-req: 10, #token: 30500, token usage: 0.80, gen throughput (token/s): 222.25, #queue-req: 245
- 2025-07-20 15:36:48,964 - __main__ - INFO - sglang running req: 10 queue req: 245
- 2025-07-20 15:36:49,707 - sglang - INFO - [2025-07-20 15:36:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 244
- 2025-07-20 15:36:49,707 - __main__ - INFO - sglang running req: 9 queue req: 244
- 2025-07-20 15:36:50,710 - sglang - INFO - [2025-07-20 15:36:50 TP0] Decode batch. #running-req: 10, #token: 30784, token usage: 0.81, gen throughput (token/s): 228.51, #queue-req: 244
- 2025-07-20 15:36:50,710 - __main__ - INFO - sglang running req: 10 queue req: 244
- 2025-07-20 15:36:51,698 - sglang - INFO - [2025-07-20 15:36:51 TP0] Decode batch. #running-req: 10, #token: 31184, token usage: 0.82, gen throughput (token/s): 404.94, #queue-req: 244
- 2025-07-20 15:36:51,698 - __main__ - INFO - sglang running req: 10 queue req: 244
- 2025-07-20 15:36:51,896 - sglang - INFO - [2025-07-20 15:36:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2658, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 243
- 2025-07-20 15:36:51,896 - __main__ - INFO - sglang running req: 9 queue req: 243
- 2025-07-20 15:36:53,477 - sglang - INFO - [2025-07-20 15:36:53 TP0] Decode batch. #running-req: 10, #token: 30418, token usage: 0.80, gen throughput (token/s): 224.22, #queue-req: 243
- 2025-07-20 15:36:53,478 - __main__ - INFO - sglang running req: 10 queue req: 243
- 2025-07-20 15:36:53,759 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:36:53,760 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 848.79 1022.97
- sglang_output_tokens 245.35 297.58
- 2025-07-20 15:36:53,760 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 257 | 500
- 1 | 0 | 10
- 2025-07-20 15:36:54,272 - sglang - INFO - [2025-07-20 15:36:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 242
- 2025-07-20 15:36:54,273 - __main__ - INFO - sglang running req: 9 queue req: 242
- 2025-07-20 15:36:55,220 - sglang - INFO - [2025-07-20 15:36:55 TP0] Decode batch. #running-req: 10, #token: 30016, token usage: 0.79, gen throughput (token/s): 228.92, #queue-req: 242
- 2025-07-20 15:36:55,220 - __main__ - INFO - sglang running req: 10 queue req: 242
- 2025-07-20 15:36:56,208 - sglang - INFO - [2025-07-20 15:36:56 TP0] Decode batch. #running-req: 10, #token: 30416, token usage: 0.80, gen throughput (token/s): 404.97, #queue-req: 242
- 2025-07-20 15:36:56,208 - __main__ - INFO - sglang running req: 10 queue req: 242
- 2025-07-20 15:36:56,703 - sglang - INFO - [2025-07-20 15:36:56 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 241
- 2025-07-20 15:36:56,703 - __main__ - INFO - sglang running req: 9 queue req: 241
- 2025-07-20 15:36:57,844 - sglang - INFO - [2025-07-20 15:36:57 TP0] Decode batch. #running-req: 10, #token: 29439, token usage: 0.77, gen throughput (token/s): 243.87, #queue-req: 241
- 2025-07-20 15:36:57,844 - __main__ - INFO - sglang running req: 10 queue req: 241
- 2025-07-20 15:36:58,216 - sglang - INFO - [2025-07-20 15:36:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1278, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 240
- 2025-07-20 15:36:58,216 - __main__ - INFO - sglang running req: 9 queue req: 240
- 2025-07-20 15:36:59,338 - sglang - INFO - [2025-07-20 15:36:59 TP0] Decode batch. #running-req: 10, #token: 27934, token usage: 0.74, gen throughput (token/s): 267.14, #queue-req: 240
- 2025-07-20 15:36:59,338 - __main__ - INFO - sglang running req: 10 queue req: 240
- 2025-07-20 15:37:00,319 - sglang - INFO - [2025-07-20 15:37:00 TP0] Decode batch. #running-req: 10, #token: 28334, token usage: 0.75, gen throughput (token/s): 407.60, #queue-req: 240
- 2025-07-20 15:37:00,319 - __main__ - INFO - sglang running req: 10 queue req: 240
- 2025-07-20 15:37:01,304 - sglang - INFO - [2025-07-20 15:37:01 TP0] Decode batch. #running-req: 10, #token: 28734, token usage: 0.76, gen throughput (token/s): 405.99, #queue-req: 240
- 2025-07-20 15:37:01,304 - __main__ - INFO - sglang running req: 10 queue req: 240
- 2025-07-20 15:37:01,822 - sglang - INFO - [2025-07-20 15:37:01 TP0] Prefill batch. #new-seq: 2, #new-token: 4188, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 238
- 2025-07-20 15:37:01,822 - __main__ - INFO - sglang running req: 9 queue req: 238
- 2025-07-20 15:37:03,613 - sglang - INFO - [2025-07-20 15:37:03 TP0] Decode batch. #running-req: 11, #token: 29688, token usage: 0.78, gen throughput (token/s): 181.06, #queue-req: 238
- 2025-07-20 15:37:03,613 - __main__ - INFO - sglang running req: 11 queue req: 238
- 2025-07-20 15:37:03,761 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:03,761 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 851.26 1019.37
- sglang_output_tokens 245.98 295.62
- 2025-07-20 15:37:03,761 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 261 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:03,809 - sglang - INFO - [2025-07-20 15:37:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 237
- 2025-07-20 15:37:03,810 - __main__ - INFO - sglang running req: 10 queue req: 237
- 2025-07-20 15:37:05,400 - sglang - INFO - [2025-07-20 15:37:05 TP0] Decode batch. #running-req: 11, #token: 31174, token usage: 0.82, gen throughput (token/s): 245.59, #queue-req: 237
- 2025-07-20 15:37:05,401 - __main__ - INFO - sglang running req: 11 queue req: 237
- 2025-07-20 15:37:06,392 - sglang - INFO - [2025-07-20 15:37:06 TP0] Decode batch. #running-req: 11, #token: 31614, token usage: 0.83, gen throughput (token/s): 443.83, #queue-req: 237
- 2025-07-20 15:37:06,392 - __main__ - INFO - sglang running req: 11 queue req: 237
- 2025-07-20 15:37:06,516 - sglang - INFO - [2025-07-20 15:37:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2906, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 236
- 2025-07-20 15:37:06,516 - __main__ - INFO - sglang running req: 10 queue req: 236
- 2025-07-20 15:37:07,576 - sglang - INFO - [2025-07-20 15:37:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1757, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 235
- 2025-07-20 15:37:07,580 - __main__ - INFO - sglang running req: 10 queue req: 235
- 2025-07-20 15:37:08,935 - sglang - INFO - [2025-07-20 15:37:08 TP0] Decode batch. #running-req: 11, #token: 31180, token usage: 0.82, gen throughput (token/s): 172.22, #queue-req: 235
- 2025-07-20 15:37:08,935 - __main__ - INFO - sglang running req: 11 queue req: 235
- 2025-07-20 15:37:09,537 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:37:09,556 - sglang - INFO - [2025-07-20 15:37:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 234
- 2025-07-20 15:37:09,557 - __main__ - INFO - sglang running req: 10 queue req: 234
- 2025-07-20 15:37:09,786 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
- 2025-07-20 15:37:10,581 - sglang - INFO - [2025-07-20 15:37:10 TP0] Decode batch. #running-req: 10, #token: 27946, token usage: 0.74, gen throughput (token/s): 266.07, #queue-req: 235
- 2025-07-20 15:37:10,581 - __main__ - INFO - sglang running req: 10 queue req: 235
- 2025-07-20 15:37:10,582 - sglang - INFO - [2025-07-20 15:37:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 234
- 2025-07-20 15:37:10,582 - __main__ - INFO - sglang running req: 10 queue req: 234
- 2025-07-20 15:37:12,383 - sglang - INFO - [2025-07-20 15:37:12 TP0] Decode batch. #running-req: 11, #token: 31158, token usage: 0.82, gen throughput (token/s): 244.25, #queue-req: 234
- 2025-07-20 15:37:12,383 - __main__ - INFO - sglang running req: 11 queue req: 234
- 2025-07-20 15:37:13,378 - sglang - INFO - [2025-07-20 15:37:13 TP0] Decode batch. #running-req: 11, #token: 31598, token usage: 0.83, gen throughput (token/s): 442.14, #queue-req: 234
- 2025-07-20 15:37:13,378 - __main__ - INFO - sglang running req: 11 queue req: 234
- 2025-07-20 15:37:13,762 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:13,763 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 853.20 1025.23
- sglang_output_tokens 246.04 293.57
- 2025-07-20 15:37:13,763 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 265 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:14,373 - sglang - INFO - [2025-07-20 15:37:14 TP0] Decode batch. #running-req: 11, #token: 32038, token usage: 0.84, gen throughput (token/s): 442.14, #queue-req: 234
- 2025-07-20 15:37:14,373 - __main__ - INFO - sglang running req: 11 queue req: 234
- 2025-07-20 15:37:15,366 - sglang - INFO - [2025-07-20 15:37:15 TP0] Decode batch. #running-req: 11, #token: 32478, token usage: 0.85, gen throughput (token/s): 443.00, #queue-req: 234
- 2025-07-20 15:37:15,366 - __main__ - INFO - sglang running req: 11 queue req: 234
- 2025-07-20 15:37:15,939 - sglang - INFO - [2025-07-20 15:37:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2188, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 233
- 2025-07-20 15:37:15,939 - __main__ - INFO - sglang running req: 10 queue req: 233
- 2025-07-20 15:37:17,087 - sglang - INFO - [2025-07-20 15:37:17 TP0] Decode batch. #running-req: 11, #token: 31502, token usage: 0.83, gen throughput (token/s): 255.04, #queue-req: 233
- 2025-07-20 15:37:17,088 - __main__ - INFO - sglang running req: 11 queue req: 233
- 2025-07-20 15:37:18,081 - sglang - INFO - [2025-07-20 15:37:18 TP0] Decode batch. #running-req: 11, #token: 31942, token usage: 0.84, gen throughput (token/s): 442.94, #queue-req: 233
- 2025-07-20 15:37:18,081 - __main__ - INFO - sglang running req: 11 queue req: 233
- 2025-07-20 15:37:18,579 - sglang - INFO - [2025-07-20 15:37:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 232
- 2025-07-20 15:37:18,579 - __main__ - INFO - sglang running req: 10 queue req: 232
- 2025-07-20 15:37:19,730 - sglang - INFO - [2025-07-20 15:37:19 TP0] Decode batch. #running-req: 11, #token: 32194, token usage: 0.85, gen throughput (token/s): 266.20, #queue-req: 232
- 2025-07-20 15:37:19,730 - __main__ - INFO - sglang running req: 11 queue req: 232
- 2025-07-20 15:37:20,325 - sglang - INFO - [2025-07-20 15:37:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2606, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 231
- 2025-07-20 15:37:20,325 - __main__ - INFO - sglang running req: 9 queue req: 231
- 2025-07-20 15:37:21,506 - sglang - INFO - [2025-07-20 15:37:21 TP0] Decode batch. #running-req: 10, #token: 29452, token usage: 0.78, gen throughput (token/s): 229.14, #queue-req: 231
- 2025-07-20 15:37:21,506 - __main__ - INFO - sglang running req: 10 queue req: 231
- 2025-07-20 15:37:21,556 - sglang - INFO - [2025-07-20 15:37:21 TP0] Prefill batch. #new-seq: 1, #new-token: 1939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 230
- 2025-07-20 15:37:21,556 - __main__ - INFO - sglang running req: 9 queue req: 230
- 2025-07-20 15:37:23,141 - sglang - INFO - [2025-07-20 15:37:23 TP0] Decode batch. #running-req: 10, #token: 28339, token usage: 0.75, gen throughput (token/s): 244.03, #queue-req: 230
- 2025-07-20 15:37:23,142 - __main__ - INFO - sglang running req: 10 queue req: 230
- 2025-07-20 15:37:23,485 - sglang - INFO - [2025-07-20 15:37:23 TP0] Prefill batch. #new-seq: 1, #new-token: 2593, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 229
- 2025-07-20 15:37:23,485 - __main__ - INFO - sglang running req: 9 queue req: 229
- 2025-07-20 15:37:23,764 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:23,764 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 860.77 1035.29
- sglang_output_tokens 248.19 296.91
- 2025-07-20 15:37:23,764 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 271 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:24,911 - sglang - INFO - [2025-07-20 15:37:24 TP0] Decode batch. #running-req: 10, #token: 27966, token usage: 0.74, gen throughput (token/s): 225.48, #queue-req: 229
- 2025-07-20 15:37:24,911 - __main__ - INFO - sglang running req: 10 queue req: 229
- 2025-07-20 15:37:25,500 - sglang - INFO - [2025-07-20 15:37:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2299, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 228
- 2025-07-20 15:37:25,500 - __main__ - INFO - sglang running req: 9 queue req: 228
- 2025-07-20 15:37:26,620 - sglang - INFO - [2025-07-20 15:37:26 TP0] Decode batch. #running-req: 10, #token: 27461, token usage: 0.72, gen throughput (token/s): 233.44, #queue-req: 228
- 2025-07-20 15:37:26,620 - __main__ - INFO - sglang running req: 10 queue req: 228
- 2025-07-20 15:37:27,601 - sglang - INFO - [2025-07-20 15:37:27 TP0] Decode batch. #running-req: 10, #token: 27861, token usage: 0.73, gen throughput (token/s): 407.62, #queue-req: 228
- 2025-07-20 15:37:27,602 - __main__ - INFO - sglang running req: 10 queue req: 228
- 2025-07-20 15:37:28,584 - sglang - INFO - [2025-07-20 15:37:28 TP0] Decode batch. #running-req: 10, #token: 28261, token usage: 0.74, gen throughput (token/s): 407.23, #queue-req: 228
- 2025-07-20 15:37:28,584 - __main__ - INFO - sglang running req: 10 queue req: 228
- 2025-07-20 15:37:28,976 - sglang - INFO - [2025-07-20 15:37:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 227
- 2025-07-20 15:37:28,976 - __main__ - INFO - sglang running req: 9 queue req: 227
- 2025-07-20 15:37:30,321 - sglang - INFO - [2025-07-20 15:37:30 TP0] Decode batch. #running-req: 10, #token: 29004, token usage: 0.76, gen throughput (token/s): 229.66, #queue-req: 227
- 2025-07-20 15:37:30,321 - __main__ - INFO - sglang running req: 10 queue req: 227
- 2025-07-20 15:37:31,305 - sglang - INFO - [2025-07-20 15:37:31 TP0] Decode batch. #running-req: 10, #token: 29404, token usage: 0.77, gen throughput (token/s): 406.34, #queue-req: 227
- 2025-07-20 15:37:31,305 - __main__ - INFO - sglang running req: 10 queue req: 227
- 2025-07-20 15:37:32,298 - sglang - INFO - [2025-07-20 15:37:32 TP0] Decode batch. #running-req: 10, #token: 26151, token usage: 0.69, gen throughput (token/s): 403.10, #queue-req: 227
- 2025-07-20 15:37:32,298 - __main__ - INFO - sglang running req: 10 queue req: 227
- 2025-07-20 15:37:32,322 - sglang - INFO - [2025-07-20 15:37:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2281, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 226
- 2025-07-20 15:37:32,322 - __main__ - INFO - sglang running req: 9 queue req: 226
- 2025-07-20 15:37:33,766 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:33,767 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 858.84 1018.66
- sglang_output_tokens 247.37 292.33
- 2025-07-20 15:37:33,767 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 274 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:34,009 - sglang - INFO - [2025-07-20 15:37:34 TP0] Decode batch. #running-req: 10, #token: 28831, token usage: 0.76, gen throughput (token/s): 233.15, #queue-req: 226
- 2025-07-20 15:37:34,009 - __main__ - INFO - sglang running req: 10 queue req: 226
- 2025-07-20 15:37:34,625 - sglang - INFO - [2025-07-20 15:37:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2165, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 225
- 2025-07-20 15:37:34,625 - __main__ - INFO - sglang running req: 9 queue req: 225
- 2025-07-20 15:37:35,666 - sglang - INFO - [2025-07-20 15:37:35 TP0] Decode batch. #running-req: 10, #token: 28062, token usage: 0.74, gen throughput (token/s): 240.84, #queue-req: 225
- 2025-07-20 15:37:35,666 - __main__ - INFO - sglang running req: 10 queue req: 225
- 2025-07-20 15:37:36,475 - sglang - INFO - [2025-07-20 15:37:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2909, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 224
- 2025-07-20 15:37:36,475 - __main__ - INFO - sglang running req: 9 queue req: 224
- 2025-07-20 15:37:37,481 - sglang - INFO - [2025-07-20 15:37:37 TP0] Decode batch. #running-req: 10, #token: 28613, token usage: 0.75, gen throughput (token/s): 219.76, #queue-req: 224
- 2025-07-20 15:37:37,482 - __main__ - INFO - sglang running req: 10 queue req: 224
- 2025-07-20 15:37:37,702 - sglang - INFO - [2025-07-20 15:37:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1745, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 223
- 2025-07-20 15:37:37,702 - __main__ - INFO - sglang running req: 9 queue req: 223
- 2025-07-20 15:37:38,876 - sglang - INFO - [2025-07-20 15:37:38 TP0] Prefill batch. #new-seq: 2, #new-token: 4534, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 221
- 2025-07-20 15:37:38,876 - __main__ - INFO - sglang running req: 9 queue req: 221
- 2025-07-20 15:37:40,491 - sglang - INFO - [2025-07-20 15:37:40 TP0] Decode batch. #running-req: 11, #token: 29110, token usage: 0.77, gen throughput (token/s): 134.89, #queue-req: 221
- 2025-07-20 15:37:40,492 - __main__ - INFO - sglang running req: 11 queue req: 221
- 2025-07-20 15:37:41,480 - sglang - INFO - [2025-07-20 15:37:41 TP0] Decode batch. #running-req: 11, #token: 29550, token usage: 0.78, gen throughput (token/s): 445.12, #queue-req: 221
- 2025-07-20 15:37:41,480 - __main__ - INFO - sglang running req: 11 queue req: 221
- 2025-07-20 15:37:41,678 - sglang - INFO - [2025-07-20 15:37:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2180, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 220
- 2025-07-20 15:37:41,678 - __main__ - INFO - sglang running req: 10 queue req: 220
- 2025-07-20 15:37:43,283 - sglang - INFO - [2025-07-20 15:37:43 TP0] Decode batch. #running-req: 11, #token: 28550, token usage: 0.75, gen throughput (token/s): 243.43, #queue-req: 220
- 2025-07-20 15:37:43,283 - __main__ - INFO - sglang running req: 11 queue req: 220
- 2025-07-20 15:37:43,768 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:43,769 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 863.59 1020.12
- sglang_output_tokens 248.90 293.74
- 2025-07-20 15:37:43,769 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 279 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:44,183 - sglang - INFO - [2025-07-20 15:37:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2348, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 219
- 2025-07-20 15:37:44,183 - __main__ - INFO - sglang running req: 10 queue req: 219
- 2025-07-20 15:37:45,077 - sglang - INFO - [2025-07-20 15:37:45 TP0] Decode batch. #running-req: 11, #token: 28548, token usage: 0.75, gen throughput (token/s): 244.78, #queue-req: 219
- 2025-07-20 15:37:45,077 - __main__ - INFO - sglang running req: 11 queue req: 219
- 2025-07-20 15:37:46,060 - sglang - INFO - [2025-07-20 15:37:46 TP0] Decode batch. #running-req: 11, #token: 28988, token usage: 0.76, gen throughput (token/s): 447.24, #queue-req: 219
- 2025-07-20 15:37:46,061 - __main__ - INFO - sglang running req: 11 queue req: 219
- 2025-07-20 15:37:46,506 - sglang - INFO - [2025-07-20 15:37:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2574, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 218
- 2025-07-20 15:37:46,506 - __main__ - INFO - sglang running req: 10 queue req: 218
- 2025-07-20 15:37:47,836 - sglang - INFO - [2025-07-20 15:37:47 TP0] Decode batch. #running-req: 11, #token: 28805, token usage: 0.76, gen throughput (token/s): 247.18, #queue-req: 218
- 2025-07-20 15:37:47,837 - __main__ - INFO - sglang running req: 11 queue req: 218
- 2025-07-20 15:37:48,824 - sglang - INFO - [2025-07-20 15:37:48 TP0] Decode batch. #running-req: 11, #token: 29245, token usage: 0.77, gen throughput (token/s): 445.53, #queue-req: 218
- 2025-07-20 15:37:48,824 - __main__ - INFO - sglang running req: 11 queue req: 218
- 2025-07-20 15:37:49,815 - sglang - INFO - [2025-07-20 15:37:49 TP0] Decode batch. #running-req: 11, #token: 29685, token usage: 0.78, gen throughput (token/s): 444.16, #queue-req: 218
- 2025-07-20 15:37:49,815 - __main__ - INFO - sglang running req: 11 queue req: 218
- 2025-07-20 15:37:50,806 - sglang - INFO - [2025-07-20 15:37:50 TP0] Decode batch. #running-req: 11, #token: 30125, token usage: 0.79, gen throughput (token/s): 443.94, #queue-req: 218
- 2025-07-20 15:37:50,806 - __main__ - INFO - sglang running req: 11 queue req: 218
- 2025-07-20 15:37:51,079 - sglang - INFO - [2025-07-20 15:37:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2971, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 217
- 2025-07-20 15:37:51,079 - __main__ - INFO - sglang running req: 10 queue req: 217
- 2025-07-20 15:37:52,666 - sglang - INFO - [2025-07-20 15:37:52 TP0] Decode batch. #running-req: 11, #token: 30103, token usage: 0.79, gen throughput (token/s): 235.98, #queue-req: 217
- 2025-07-20 15:37:52,666 - __main__ - INFO - sglang running req: 11 queue req: 217
- 2025-07-20 15:37:53,560 - sglang - INFO - [2025-07-20 15:37:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2317, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 216
- 2025-07-20 15:37:53,560 - __main__ - INFO - sglang running req: 10 queue req: 216
- 2025-07-20 15:37:53,770 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:37:53,770 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 864.37 1015.52
- sglang_output_tokens 248.72 292.27
- 2025-07-20 15:37:53,770 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 283 | 500
- 1 | 0 | 10
- 2025-07-20 15:37:54,409 - sglang - INFO - [2025-07-20 15:37:54 TP0] Decode batch. #running-req: 11, #token: 30688, token usage: 0.81, gen throughput (token/s): 251.84, #queue-req: 216
- 2025-07-20 15:37:54,410 - __main__ - INFO - sglang running req: 11 queue req: 216
- 2025-07-20 15:37:55,257 - sglang - INFO - [2025-07-20 15:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2014, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 215
- 2025-07-20 15:37:55,257 - __main__ - INFO - sglang running req: 10 queue req: 215
- 2025-07-20 15:37:56,065 - sglang - INFO - [2025-07-20 15:37:56 TP0] Decode batch. #running-req: 11, #token: 30268, token usage: 0.80, gen throughput (token/s): 265.13, #queue-req: 215
- 2025-07-20 15:37:56,065 - __main__ - INFO - sglang running req: 11 queue req: 215
- 2025-07-20 15:37:57,059 - sglang - INFO - [2025-07-20 15:37:57 TP0] Decode batch. #running-req: 11, #token: 30708, token usage: 0.81, gen throughput (token/s): 442.99, #queue-req: 215
- 2025-07-20 15:37:57,059 - __main__ - INFO - sglang running req: 11 queue req: 215
- 2025-07-20 15:37:58,053 - sglang - INFO - [2025-07-20 15:37:58 TP0] Decode batch. #running-req: 11, #token: 31148, token usage: 0.82, gen throughput (token/s): 442.27, #queue-req: 215
- 2025-07-20 15:37:58,053 - __main__ - INFO - sglang running req: 11 queue req: 215
- 2025-07-20 15:37:58,674 - sglang - INFO - [2025-07-20 15:37:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 214
- 2025-07-20 15:37:58,674 - __main__ - INFO - sglang running req: 10 queue req: 214
- 2025-07-20 15:37:59,853 - sglang - INFO - [2025-07-20 15:37:59 TP0] Decode batch. #running-req: 11, #token: 31503, token usage: 0.83, gen throughput (token/s): 243.96, #queue-req: 214
- 2025-07-20 15:37:59,853 - __main__ - INFO - sglang running req: 11 queue req: 214
- 2025-07-20 15:38:00,547 - sglang - INFO - [2025-07-20 15:38:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 213
- 2025-07-20 15:38:00,548 - __main__ - INFO - sglang running req: 10 queue req: 213
- 2025-07-20 15:38:01,495 - sglang - INFO - [2025-07-20 15:38:01 TP0] Decode batch. #running-req: 11, #token: 31070, token usage: 0.82, gen throughput (token/s): 267.33, #queue-req: 213
- 2025-07-20 15:38:01,495 - __main__ - INFO - sglang running req: 11 queue req: 213
- 2025-07-20 15:38:01,943 - sglang - INFO - [2025-07-20 15:38:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 212
- 2025-07-20 15:38:01,943 - __main__ - INFO - sglang running req: 10 queue req: 212
- 2025-07-20 15:38:02,800 - sglang - INFO - [2025-07-20 15:38:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2495, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 211
- 2025-07-20 15:38:02,800 - __main__ - INFO - sglang running req: 10 queue req: 211
- 2025-07-20 15:38:03,772 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:03,773 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 867.89 1021.30
- sglang_output_tokens 249.65 294.22
- 2025-07-20 15:38:03,773 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 288 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:04,053 - sglang - INFO - [2025-07-20 15:38:04 TP0] Decode batch. #running-req: 11, #token: 30475, token usage: 0.80, gen throughput (token/s): 171.23, #queue-req: 211
- 2025-07-20 15:38:04,053 - __main__ - INFO - sglang running req: 11 queue req: 211
- 2025-07-20 15:38:05,045 - sglang - INFO - [2025-07-20 15:38:05 TP0] Decode batch. #running-req: 11, #token: 30915, token usage: 0.81, gen throughput (token/s): 443.71, #queue-req: 211
- 2025-07-20 15:38:05,045 - __main__ - INFO - sglang running req: 11 queue req: 211
- 2025-07-20 15:38:06,037 - sglang - INFO - [2025-07-20 15:38:06 TP0] Decode batch. #running-req: 11, #token: 31355, token usage: 0.83, gen throughput (token/s): 443.09, #queue-req: 211
- 2025-07-20 15:38:06,038 - __main__ - INFO - sglang running req: 11 queue req: 211
- 2025-07-20 15:38:06,658 - sglang - INFO - [2025-07-20 15:38:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2737, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 210
- 2025-07-20 15:38:06,658 - __main__ - INFO - sglang running req: 10 queue req: 210
- 2025-07-20 15:38:07,836 - sglang - INFO - [2025-07-20 15:38:07 TP0] Decode batch. #running-req: 11, #token: 31655, token usage: 0.83, gen throughput (token/s): 244.10, #queue-req: 210
- 2025-07-20 15:38:07,836 - __main__ - INFO - sglang running req: 11 queue req: 210
- 2025-07-20 15:38:08,828 - sglang - INFO - [2025-07-20 15:38:08 TP0] Decode batch. #running-req: 11, #token: 32095, token usage: 0.84, gen throughput (token/s): 443.25, #queue-req: 210
- 2025-07-20 15:38:08,829 - __main__ - INFO - sglang running req: 11 queue req: 210
- 2025-07-20 15:38:09,277 - sglang - INFO - [2025-07-20 15:38:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2705, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 209
- 2025-07-20 15:38:09,277 - __main__ - INFO - sglang running req: 10 queue req: 209
- 2025-07-20 15:38:10,646 - sglang - INFO - [2025-07-20 15:38:10 TP0] Decode batch. #running-req: 11, #token: 32026, token usage: 0.84, gen throughput (token/s): 241.51, #queue-req: 209
- 2025-07-20 15:38:10,647 - __main__ - INFO - sglang running req: 11 queue req: 209
- 2025-07-20 15:38:11,294 - sglang - INFO - [2025-07-20 15:38:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 208
- 2025-07-20 15:38:11,294 - __main__ - INFO - sglang running req: 10 queue req: 208
- 2025-07-20 15:38:12,289 - sglang - INFO - [2025-07-20 15:38:12 TP0] Decode batch. #running-req: 11, #token: 31497, token usage: 0.83, gen throughput (token/s): 267.21, #queue-req: 208
- 2025-07-20 15:38:12,290 - __main__ - INFO - sglang running req: 11 queue req: 208
- 2025-07-20 15:38:12,364 - sglang - INFO - [2025-07-20 15:38:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2538, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 207
- 2025-07-20 15:38:12,365 - __main__ - INFO - sglang running req: 10 queue req: 207
- 2025-07-20 15:38:13,774 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:13,775 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 868.87 1018.43
- sglang_output_tokens 249.90 293.19
- 2025-07-20 15:38:13,775 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 292 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:14,040 - sglang - INFO - [2025-07-20 15:38:14 TP0] Decode batch. #running-req: 11, #token: 31183, token usage: 0.82, gen throughput (token/s): 250.83, #queue-req: 207
- 2025-07-20 15:38:14,040 - __main__ - INFO - sglang running req: 11 queue req: 207
- 2025-07-20 15:38:14,586 - sglang - INFO - [2025-07-20 15:38:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2867, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 206
- 2025-07-20 15:38:14,586 - __main__ - INFO - sglang running req: 10 queue req: 206
- 2025-07-20 15:38:15,860 - sglang - INFO - [2025-07-20 15:38:15 TP0] Decode batch. #running-req: 11, #token: 30592, token usage: 0.81, gen throughput (token/s): 241.10, #queue-req: 206
- 2025-07-20 15:38:15,860 - __main__ - INFO - sglang running req: 11 queue req: 206
- 2025-07-20 15:38:16,917 - sglang - INFO - [2025-07-20 15:38:16 TP0] Decode batch. #running-req: 11, #token: 31032, token usage: 0.82, gen throughput (token/s): 416.30, #queue-req: 206
- 2025-07-20 15:38:16,917 - __main__ - INFO - sglang running req: 11 queue req: 206
- 2025-07-20 15:38:18,045 - sglang - INFO - [2025-07-20 15:38:18 TP0] Decode batch. #running-req: 11, #token: 31472, token usage: 0.83, gen throughput (token/s): 390.16, #queue-req: 206
- 2025-07-20 15:38:18,045 - __main__ - INFO - sglang running req: 11 queue req: 206
- 2025-07-20 15:38:19,079 - sglang - INFO - [2025-07-20 15:38:19 TP0] Decode batch. #running-req: 11, #token: 31912, token usage: 0.84, gen throughput (token/s): 425.35, #queue-req: 206
- 2025-07-20 15:38:19,080 - __main__ - INFO - sglang running req: 11 queue req: 206
- 2025-07-20 15:38:19,873 - sglang - INFO - [2025-07-20 15:38:19 TP0] Prefill batch. #new-seq: 1, #new-token: 3019, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 205
- 2025-07-20 15:38:19,873 - __main__ - INFO - sglang running req: 9 queue req: 205
- 2025-07-20 15:38:20,945 - sglang - INFO - [2025-07-20 15:38:20 TP0] Decode batch. #running-req: 10, #token: 30291, token usage: 0.80, gen throughput (token/s): 222.49, #queue-req: 205
- 2025-07-20 15:38:20,945 - __main__ - INFO - sglang running req: 10 queue req: 205
- 2025-07-20 15:38:21,931 - sglang - INFO - [2025-07-20 15:38:21 TP0] Decode batch. #running-req: 10, #token: 30691, token usage: 0.81, gen throughput (token/s): 405.56, #queue-req: 205
- 2025-07-20 15:38:21,931 - __main__ - INFO - sglang running req: 10 queue req: 205
- 2025-07-20 15:38:22,916 - sglang - INFO - [2025-07-20 15:38:22 TP0] Decode batch. #running-req: 10, #token: 31091, token usage: 0.82, gen throughput (token/s): 406.01, #queue-req: 205
- 2025-07-20 15:38:22,917 - __main__ - INFO - sglang running req: 10 queue req: 205
- 2025-07-20 15:38:23,777 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:23,777 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 866.51 1002.22
- sglang_output_tokens 249.50 289.29
- 2025-07-20 15:38:23,777 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 295 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:23,903 - sglang - INFO - [2025-07-20 15:38:23 TP0] Decode batch. #running-req: 10, #token: 31491, token usage: 0.83, gen throughput (token/s): 405.22, #queue-req: 205
- 2025-07-20 15:38:23,904 - __main__ - INFO - sglang running req: 10 queue req: 205
- 2025-07-20 15:38:24,933 - sglang - INFO - [2025-07-20 15:38:24 TP0] Decode batch. #running-req: 10, #token: 31891, token usage: 0.84, gen throughput (token/s): 388.48, #queue-req: 205
- 2025-07-20 15:38:24,933 - __main__ - INFO - sglang running req: 10 queue req: 205
- 2025-07-20 15:38:25,230 - sglang - INFO - [2025-07-20 15:38:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 204
- 2025-07-20 15:38:25,231 - __main__ - INFO - sglang running req: 9 queue req: 204
- 2025-07-20 15:38:26,758 - sglang - INFO - [2025-07-20 15:38:26 TP0] Decode batch. #running-req: 10, #token: 32718, token usage: 0.86, gen throughput (token/s): 218.67, #queue-req: 204
- 2025-07-20 15:38:26,758 - __main__ - INFO - sglang running req: 10 queue req: 204
- 2025-07-20 15:38:27,753 - sglang - INFO - [2025-07-20 15:38:27 TP0] Decode batch. #running-req: 10, #token: 33118, token usage: 0.87, gen throughput (token/s): 402.00, #queue-req: 204
- 2025-07-20 15:38:27,753 - __main__ - INFO - sglang running req: 10 queue req: 204
- 2025-07-20 15:38:28,026 - sglang - INFO - [2025-07-20 15:38:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2362, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 203
- 2025-07-20 15:38:28,027 - __main__ - INFO - sglang running req: 9 queue req: 203
- 2025-07-20 15:38:28,925 - sglang - INFO - [2025-07-20 15:38:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2766, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 202
- 2025-07-20 15:38:28,925 - __main__ - INFO - sglang running req: 9 queue req: 202
- 2025-07-20 15:38:30,302 - sglang - INFO - [2025-07-20 15:38:30 TP0] Decode batch. #running-req: 10, #token: 31848, token usage: 0.84, gen throughput (token/s): 156.11, #queue-req: 202
- 2025-07-20 15:38:30,302 - __main__ - INFO - sglang running req: 10 queue req: 202
- 2025-07-20 15:38:31,292 - sglang - INFO - [2025-07-20 15:38:31 TP0] Decode batch. #running-req: 10, #token: 32248, token usage: 0.85, gen throughput (token/s): 404.10, #queue-req: 202
- 2025-07-20 15:38:31,292 - __main__ - INFO - sglang running req: 10 queue req: 202
- 2025-07-20 15:38:31,417 - sglang - INFO - [2025-07-20 15:38:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1575, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 201
- 2025-07-20 15:38:31,417 - __main__ - INFO - sglang running req: 9 queue req: 201
- 2025-07-20 15:38:32,865 - sglang - INFO - [2025-07-20 15:38:32 TP0] Decode batch. #running-req: 10, #token: 30695, token usage: 0.81, gen throughput (token/s): 253.57, #queue-req: 201
- 2025-07-20 15:38:32,866 - __main__ - INFO - sglang running req: 10 queue req: 201
- 2025-07-20 15:38:33,779 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:33,779 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 867.82 1012.34
- sglang_output_tokens 249.91 292.32
- 2025-07-20 15:38:33,779 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 299 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:33,853 - sglang - INFO - [2025-07-20 15:38:33 TP0] Decode batch. #running-req: 10, #token: 31095, token usage: 0.82, gen throughput (token/s): 404.92, #queue-req: 201
- 2025-07-20 15:38:33,853 - __main__ - INFO - sglang running req: 10 queue req: 201
- 2025-07-20 15:38:34,274 - sglang - INFO - [2025-07-20 15:38:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 200
- 2025-07-20 15:38:34,274 - __main__ - INFO - sglang running req: 9 queue req: 200
- 2025-07-20 15:38:35,596 - sglang - INFO - [2025-07-20 15:38:35 TP0] Decode batch. #running-req: 10, #token: 29794, token usage: 0.78, gen throughput (token/s): 228.90, #queue-req: 200
- 2025-07-20 15:38:35,597 - __main__ - INFO - sglang running req: 10 queue req: 200
- 2025-07-20 15:38:35,843 - sglang - INFO - [2025-07-20 15:38:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2576, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 199
- 2025-07-20 15:38:35,843 - __main__ - INFO - sglang running req: 9 queue req: 199
- 2025-07-20 15:38:37,098 - sglang - INFO - [2025-07-20 15:38:37 TP0] Prefill batch. #new-seq: 2, #new-token: 3615, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 197
- 2025-07-20 15:38:37,098 - __main__ - INFO - sglang running req: 9 queue req: 197
- 2025-07-20 15:38:38,582 - sglang - INFO - [2025-07-20 15:38:38 TP0] Decode batch. #running-req: 11, #token: 29695, token usage: 0.78, gen throughput (token/s): 137.01, #queue-req: 197
- 2025-07-20 15:38:38,582 - __main__ - INFO - sglang running req: 11 queue req: 197
- 2025-07-20 15:38:38,879 - sglang - INFO - [2025-07-20 15:38:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2375, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 196
- 2025-07-20 15:38:38,879 - __main__ - INFO - sglang running req: 10 queue req: 196
- 2025-07-20 15:38:40,318 - sglang - INFO - [2025-07-20 15:38:40 TP0] Decode batch. #running-req: 11, #token: 28906, token usage: 0.76, gen throughput (token/s): 252.78, #queue-req: 196
- 2025-07-20 15:38:40,319 - __main__ - INFO - sglang running req: 11 queue req: 196
- 2025-07-20 15:38:41,256 - sglang - INFO - [2025-07-20 15:38:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2003, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 195
- 2025-07-20 15:38:41,256 - __main__ - INFO - sglang running req: 10 queue req: 195
- 2025-07-20 15:38:41,963 - sglang - INFO - [2025-07-20 15:38:41 TP0] Decode batch. #running-req: 11, #token: 29541, token usage: 0.78, gen throughput (token/s): 266.91, #queue-req: 195
- 2025-07-20 15:38:41,963 - __main__ - INFO - sglang running req: 11 queue req: 195
- 2025-07-20 15:38:42,582 - sglang - INFO - [2025-07-20 15:38:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2867, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 194
- 2025-07-20 15:38:42,582 - __main__ - INFO - sglang running req: 10 queue req: 194
- 2025-07-20 15:38:43,782 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:43,782 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 874.00 1030.66
- sglang_output_tokens 251.58 296.82
- 2025-07-20 15:38:43,782 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 305 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:43,786 - sglang - INFO - [2025-07-20 15:38:43 TP0] Decode batch. #running-req: 11, #token: 31446, token usage: 0.83, gen throughput (token/s): 240.80, #queue-req: 194
- 2025-07-20 15:38:43,786 - __main__ - INFO - sglang running req: 11 queue req: 194
- 2025-07-20 15:38:44,779 - sglang - INFO - [2025-07-20 15:38:44 TP0] Decode batch. #running-req: 11, #token: 31886, token usage: 0.84, gen throughput (token/s): 443.23, #queue-req: 194
- 2025-07-20 15:38:44,779 - __main__ - INFO - sglang running req: 11 queue req: 194
- 2025-07-20 15:38:45,770 - sglang - INFO - [2025-07-20 15:38:45 TP0] Decode batch. #running-req: 11, #token: 32326, token usage: 0.85, gen throughput (token/s): 444.20, #queue-req: 194
- 2025-07-20 15:38:45,770 - __main__ - INFO - sglang running req: 11 queue req: 194
- 2025-07-20 15:38:46,366 - sglang - INFO - [2025-07-20 15:38:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2471, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 193
- 2025-07-20 15:38:46,366 - __main__ - INFO - sglang running req: 10 queue req: 193
- 2025-07-20 15:38:47,524 - sglang - INFO - [2025-07-20 15:38:47 TP0] Decode batch. #running-req: 11, #token: 31488, token usage: 0.83, gen throughput (token/s): 250.24, #queue-req: 193
- 2025-07-20 15:38:47,524 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:48,553 - sglang - INFO - [2025-07-20 15:38:48 TP0] Decode batch. #running-req: 11, #token: 31928, token usage: 0.84, gen throughput (token/s): 427.74, #queue-req: 193
- 2025-07-20 15:38:48,553 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:49,592 - sglang - INFO - [2025-07-20 15:38:49 TP0] Decode batch. #running-req: 11, #token: 32368, token usage: 0.85, gen throughput (token/s): 423.31, #queue-req: 193
- 2025-07-20 15:38:49,592 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:50,607 - sglang - INFO - [2025-07-20 15:38:50 TP0] Decode batch. #running-req: 11, #token: 32808, token usage: 0.86, gen throughput (token/s): 433.67, #queue-req: 193
- 2025-07-20 15:38:50,607 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:51,605 - sglang - INFO - [2025-07-20 15:38:51 TP0] Decode batch. #running-req: 11, #token: 33248, token usage: 0.88, gen throughput (token/s): 440.49, #queue-req: 193
- 2025-07-20 15:38:51,605 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:52,686 - sglang - INFO - [2025-07-20 15:38:52 TP0] Decode batch. #running-req: 11, #token: 33688, token usage: 0.89, gen throughput (token/s): 407.31, #queue-req: 193
- 2025-07-20 15:38:52,686 - __main__ - INFO - sglang running req: 11 queue req: 193
- 2025-07-20 15:38:53,677 - sglang - INFO - [2025-07-20 15:38:53 TP0] Decode batch. #running-req: 10, #token: 31057, token usage: 0.82, gen throughput (token/s): 416.50, #queue-req: 193
- 2025-07-20 15:38:53,678 - __main__ - INFO - sglang running req: 10 queue req: 193
- 2025-07-20 15:38:53,783 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:38:53,783 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 869.82 1000.47
- sglang_output_tokens 250.44 288.99
- 2025-07-20 15:38:53,784 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 307 | 500
- 1 | 0 | 10
- 2025-07-20 15:38:54,496 - sglang - INFO - [2025-07-20 15:38:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 192
- 2025-07-20 15:38:54,496 - __main__ - INFO - sglang running req: 9 queue req: 192
- 2025-07-20 15:38:55,252 - sglang - INFO - [2025-07-20 15:38:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1626, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 191
- 2025-07-20 15:38:55,252 - __main__ - INFO - sglang running req: 9 queue req: 191
- 2025-07-20 15:38:55,911 - sglang - INFO - [2025-07-20 15:38:55 TP0] Decode batch. #running-req: 10, #token: 29129, token usage: 0.77, gen throughput (token/s): 178.16, #queue-req: 191
- 2025-07-20 15:38:55,911 - __main__ - INFO - sglang running req: 10 queue req: 191
- 2025-07-20 15:38:56,900 - sglang - INFO - [2025-07-20 15:38:56 TP0] Decode batch. #running-req: 10, #token: 29529, token usage: 0.78, gen throughput (token/s): 404.59, #queue-req: 191
- 2025-07-20 15:38:56,900 - __main__ - INFO - sglang running req: 10 queue req: 191
- 2025-07-20 15:38:57,890 - sglang - INFO - [2025-07-20 15:38:57 TP0] Decode batch. #running-req: 10, #token: 29929, token usage: 0.79, gen throughput (token/s): 404.07, #queue-req: 191
- 2025-07-20 15:38:57,890 - __main__ - INFO - sglang running req: 10 queue req: 191
- 2025-07-20 15:38:58,236 - sglang - INFO - [2025-07-20 15:38:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2667, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 190
- 2025-07-20 15:38:58,236 - __main__ - INFO - sglang running req: 9 queue req: 190
- 2025-07-20 15:38:59,543 - sglang - INFO - [2025-07-20 15:38:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 189
- 2025-07-20 15:38:59,543 - __main__ - INFO - sglang running req: 9 queue req: 189
- 2025-07-20 15:39:00,474 - sglang - INFO - [2025-07-20 15:39:00 TP0] Decode batch. #running-req: 10, #token: 28938, token usage: 0.76, gen throughput (token/s): 153.98, #queue-req: 189
- 2025-07-20 15:39:00,475 - __main__ - INFO - sglang running req: 10 queue req: 189
- 2025-07-20 15:39:01,457 - sglang - INFO - [2025-07-20 15:39:01 TP0] Decode batch. #running-req: 10, #token: 29338, token usage: 0.77, gen throughput (token/s): 407.00, #queue-req: 189
- 2025-07-20 15:39:01,459 - __main__ - INFO - sglang running req: 10 queue req: 189
- 2025-07-20 15:39:02,443 - sglang - INFO - [2025-07-20 15:39:02 TP0] Decode batch. #running-req: 10, #token: 29738, token usage: 0.78, gen throughput (token/s): 405.60, #queue-req: 189
- 2025-07-20 15:39:02,444 - __main__ - INFO - sglang running req: 10 queue req: 189
- 2025-07-20 15:39:03,036 - sglang - INFO - [2025-07-20 15:39:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 188
- 2025-07-20 15:39:03,037 - __main__ - INFO - sglang running req: 9 queue req: 188
- 2025-07-20 15:39:03,786 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:03,786 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 874.28 1017.92
- sglang_output_tokens 251.69 294.21
- 2025-07-20 15:39:03,786 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 312 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:04,158 - sglang - INFO - [2025-07-20 15:39:04 TP0] Decode batch. #running-req: 10, #token: 29137, token usage: 0.77, gen throughput (token/s): 232.71, #queue-req: 188
- 2025-07-20 15:39:04,158 - __main__ - INFO - sglang running req: 10 queue req: 188
- 2025-07-20 15:39:04,946 - sglang - INFO - [2025-07-20 15:39:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2447, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 187
- 2025-07-20 15:39:04,946 - __main__ - INFO - sglang running req: 9 queue req: 187
- 2025-07-20 15:39:05,898 - sglang - INFO - [2025-07-20 15:39:05 TP0] Decode batch. #running-req: 10, #token: 28852, token usage: 0.76, gen throughput (token/s): 229.37, #queue-req: 187
- 2025-07-20 15:39:05,898 - __main__ - INFO - sglang running req: 10 queue req: 187
- 2025-07-20 15:39:06,193 - sglang - INFO - [2025-07-20 15:39:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1862, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 186
- 2025-07-20 15:39:06,194 - __main__ - INFO - sglang running req: 9 queue req: 186
- 2025-07-20 15:39:07,528 - sglang - INFO - [2025-07-20 15:39:07 TP0] Decode batch. #running-req: 10, #token: 27939, token usage: 0.74, gen throughput (token/s): 244.82, #queue-req: 186
- 2025-07-20 15:39:07,528 - __main__ - INFO - sglang running req: 10 queue req: 186
- 2025-07-20 15:39:08,213 - sglang - INFO - [2025-07-20 15:39:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 185
- 2025-07-20 15:39:08,213 - __main__ - INFO - sglang running req: 9 queue req: 185
- 2025-07-20 15:39:09,234 - sglang - INFO - [2025-07-20 15:39:09 TP0] Decode batch. #running-req: 10, #token: 28261, token usage: 0.74, gen throughput (token/s): 233.75, #queue-req: 185
- 2025-07-20 15:39:09,235 - __main__ - INFO - sglang running req: 10 queue req: 185
- 2025-07-20 15:39:09,408 - sglang - INFO - [2025-07-20 15:39:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2187, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 184
- 2025-07-20 15:39:09,408 - __main__ - INFO - sglang running req: 9 queue req: 184
- 2025-07-20 15:39:10,951 - sglang - INFO - [2025-07-20 15:39:10 TP0] Decode batch. #running-req: 10, #token: 28852, token usage: 0.76, gen throughput (token/s): 232.40, #queue-req: 184
- 2025-07-20 15:39:10,951 - __main__ - INFO - sglang running req: 10 queue req: 184
- 2025-07-20 15:39:11,621 - sglang - INFO - [2025-07-20 15:39:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2684, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 183
- 2025-07-20 15:39:11,621 - __main__ - INFO - sglang running req: 9 queue req: 183
- 2025-07-20 15:39:12,742 - sglang - INFO - [2025-07-20 15:39:12 TP0] Decode batch. #running-req: 10, #token: 28227, token usage: 0.74, gen throughput (token/s): 222.82, #queue-req: 183
- 2025-07-20 15:39:12,742 - __main__ - INFO - sglang running req: 10 queue req: 183
- 2025-07-20 15:39:13,788 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:13,788 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 877.11 1039.08
- sglang_output_tokens 252.45 300.80
- 2025-07-20 15:39:13,789 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 317 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:13,895 - sglang - INFO - [2025-07-20 15:39:13 TP0] Decode batch. #running-req: 10, #token: 28627, token usage: 0.75, gen throughput (token/s): 346.91, #queue-req: 183
- 2025-07-20 15:39:13,895 - __main__ - INFO - sglang running req: 10 queue req: 183
- 2025-07-20 15:39:14,967 - sglang - INFO - [2025-07-20 15:39:14 TP0] Decode batch. #running-req: 10, #token: 29027, token usage: 0.76, gen throughput (token/s): 373.23, #queue-req: 183
- 2025-07-20 15:39:14,967 - __main__ - INFO - sglang running req: 10 queue req: 183
- 2025-07-20 15:39:15,949 - sglang - INFO - [2025-07-20 15:39:15 TP0] Decode batch. #running-req: 10, #token: 29427, token usage: 0.77, gen throughput (token/s): 407.03, #queue-req: 183
- 2025-07-20 15:39:15,950 - __main__ - INFO - sglang running req: 10 queue req: 183
- 2025-07-20 15:39:16,269 - sglang - INFO - [2025-07-20 15:39:16 TP0] Prefill batch. #new-seq: 2, #new-token: 3532, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 181
- 2025-07-20 15:39:16,269 - __main__ - INFO - sglang running req: 9 queue req: 181
- 2025-07-20 15:39:18,140 - sglang - INFO - [2025-07-20 15:39:18 TP0] Decode batch. #running-req: 11, #token: 28786, token usage: 0.76, gen throughput (token/s): 194.43, #queue-req: 181
- 2025-07-20 15:39:18,141 - __main__ - INFO - sglang running req: 11 queue req: 181
- 2025-07-20 15:39:18,560 - sglang - INFO - [2025-07-20 15:39:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2730, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 180
- 2025-07-20 15:39:18,561 - __main__ - INFO - sglang running req: 10 queue req: 180
- 2025-07-20 15:39:19,467 - sglang - INFO - [2025-07-20 15:39:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1709, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 179
- 2025-07-20 15:39:19,468 - __main__ - INFO - sglang running req: 10 queue req: 179
- 2025-07-20 15:39:20,554 - sglang - INFO - [2025-07-20 15:39:20 TP0] Decode batch. #running-req: 11, #token: 29097, token usage: 0.77, gen throughput (token/s): 181.47, #queue-req: 179
- 2025-07-20 15:39:20,554 - __main__ - INFO - sglang running req: 11 queue req: 179
- 2025-07-20 15:39:21,542 - sglang - INFO - [2025-07-20 15:39:21 TP0] Decode batch. #running-req: 11, #token: 29537, token usage: 0.78, gen throughput (token/s): 445.12, #queue-req: 179
- 2025-07-20 15:39:21,543 - __main__ - INFO - sglang running req: 11 queue req: 179
- 2025-07-20 15:39:22,062 - sglang - INFO - [2025-07-20 15:39:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1451, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 178
- 2025-07-20 15:39:22,062 - __main__ - INFO - sglang running req: 10 queue req: 178
- 2025-07-20 15:39:23,082 - sglang - INFO - [2025-07-20 15:39:23 TP0] Decode batch. #running-req: 11, #token: 29157, token usage: 0.77, gen throughput (token/s): 285.16, #queue-req: 178
- 2025-07-20 15:39:23,082 - __main__ - INFO - sglang running req: 11 queue req: 178
- 2025-07-20 15:39:23,790 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:23,791 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 876.78 1024.38
- sglang_output_tokens 252.94 297.28
- 2025-07-20 15:39:23,791 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 321 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:24,070 - sglang - INFO - [2025-07-20 15:39:24 TP0] Decode batch. #running-req: 11, #token: 29597, token usage: 0.78, gen throughput (token/s): 445.60, #queue-req: 178
- 2025-07-20 15:39:24,070 - __main__ - INFO - sglang running req: 11 queue req: 178
- 2025-07-20 15:39:25,061 - sglang - INFO - [2025-07-20 15:39:25 TP0] Decode batch. #running-req: 11, #token: 30037, token usage: 0.79, gen throughput (token/s): 443.62, #queue-req: 178
- 2025-07-20 15:39:25,061 - __main__ - INFO - sglang running req: 11 queue req: 178
- 2025-07-20 15:39:25,468 - sglang - INFO - [2025-07-20 15:39:25 TP0] Prefill batch. #new-seq: 1, #new-token: 1522, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 177
- 2025-07-20 15:39:25,468 - __main__ - INFO - sglang running req: 10 queue req: 177
- 2025-07-20 15:39:26,601 - sglang - INFO - [2025-07-20 15:39:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2073, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 176
- 2025-07-20 15:39:26,601 - __main__ - INFO - sglang running req: 10 queue req: 176
- 2025-07-20 15:39:27,310 - sglang - INFO - [2025-07-20 15:39:27 TP0] Decode batch. #running-req: 11, #token: 27786, token usage: 0.73, gen throughput (token/s): 194.74, #queue-req: 176
- 2025-07-20 15:39:27,311 - __main__ - INFO - sglang running req: 11 queue req: 176
- 2025-07-20 15:39:28,301 - sglang - INFO - [2025-07-20 15:39:28 TP0] Decode batch. #running-req: 11, #token: 28226, token usage: 0.74, gen throughput (token/s): 444.19, #queue-req: 176
- 2025-07-20 15:39:28,301 - __main__ - INFO - sglang running req: 11 queue req: 176
- 2025-07-20 15:39:29,217 - sglang - INFO - [2025-07-20 15:39:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 175
- 2025-07-20 15:39:29,218 - __main__ - INFO - sglang running req: 10 queue req: 175
- 2025-07-20 15:39:30,040 - sglang - INFO - [2025-07-20 15:39:30 TP0] Decode batch. #running-req: 11, #token: 28227, token usage: 0.74, gen throughput (token/s): 252.47, #queue-req: 175
- 2025-07-20 15:39:30,040 - __main__ - INFO - sglang running req: 11 queue req: 175
- 2025-07-20 15:39:30,262 - sglang - INFO - [2025-07-20 15:39:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1501, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 174
- 2025-07-20 15:39:30,262 - __main__ - INFO - sglang running req: 10 queue req: 174
- 2025-07-20 15:39:31,589 - sglang - INFO - [2025-07-20 15:39:31 TP0] Decode batch. #running-req: 11, #token: 28489, token usage: 0.75, gen throughput (token/s): 283.29, #queue-req: 174
- 2025-07-20 15:39:31,590 - __main__ - INFO - sglang running req: 11 queue req: 174
- 2025-07-20 15:39:31,738 - sglang - INFO - [2025-07-20 15:39:31 TP0] Prefill batch. #new-seq: 2, #new-token: 4360, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 10, #queue-req: 172
- 2025-07-20 15:39:31,738 - __main__ - INFO - sglang running req: 10 queue req: 172
- 2025-07-20 15:39:33,792 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:33,792 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 879.82 1020.48
- sglang_output_tokens 253.46 291.01
- 2025-07-20 15:39:33,792 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 326 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:33,980 - sglang - INFO - [2025-07-20 15:39:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2080, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 171
- 2025-07-20 15:39:33,980 - __main__ - INFO - sglang running req: 11 queue req: 171
- 2025-07-20 15:39:34,690 - sglang - INFO - [2025-07-20 15:39:34 TP0] Decode batch. #running-req: 12, #token: 29679, token usage: 0.78, gen throughput (token/s): 152.25, #queue-req: 171
- 2025-07-20 15:39:34,691 - __main__ - INFO - sglang running req: 12 queue req: 171
- 2025-07-20 15:39:35,088 - sglang - INFO - [2025-07-20 15:39:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 170
- 2025-07-20 15:39:35,088 - __main__ - INFO - sglang running req: 11 queue req: 170
- 2025-07-20 15:39:36,515 - sglang - INFO - [2025-07-20 15:39:36 TP0] Decode batch. #running-req: 12, #token: 30561, token usage: 0.80, gen throughput (token/s): 262.34, #queue-req: 170
- 2025-07-20 15:39:36,515 - __main__ - INFO - sglang running req: 12 queue req: 170
- 2025-07-20 15:39:36,926 - sglang - INFO - [2025-07-20 15:39:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2279, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 169
- 2025-07-20 15:39:36,927 - __main__ - INFO - sglang running req: 11 queue req: 169
- 2025-07-20 15:39:38,249 - sglang - INFO - [2025-07-20 15:39:38 TP0] Decode batch. #running-req: 12, #token: 30089, token usage: 0.79, gen throughput (token/s): 276.32, #queue-req: 169
- 2025-07-20 15:39:38,249 - __main__ - INFO - sglang running req: 12 queue req: 169
- 2025-07-20 15:39:38,647 - sglang - INFO - [2025-07-20 15:39:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2796, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 168
- 2025-07-20 15:39:38,647 - __main__ - INFO - sglang running req: 11 queue req: 168
- 2025-07-20 15:39:40,063 - sglang - INFO - [2025-07-20 15:39:40 TP0] Decode batch. #running-req: 12, #token: 31697, token usage: 0.83, gen throughput (token/s): 264.06, #queue-req: 168
- 2025-07-20 15:39:40,063 - __main__ - INFO - sglang running req: 12 queue req: 168
- 2025-07-20 15:39:41,070 - sglang - INFO - [2025-07-20 15:39:41 TP0] Decode batch. #running-req: 12, #token: 32177, token usage: 0.85, gen throughput (token/s): 476.58, #queue-req: 168
- 2025-07-20 15:39:41,070 - __main__ - INFO - sglang running req: 12 queue req: 168
- 2025-07-20 15:39:41,999 - sglang - INFO - [2025-07-20 15:39:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2771, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 167
- 2025-07-20 15:39:41,999 - __main__ - INFO - sglang running req: 10 queue req: 167
- 2025-07-20 15:39:42,883 - sglang - INFO - [2025-07-20 15:39:42 TP0] Decode batch. #running-req: 11, #token: 29977, token usage: 0.79, gen throughput (token/s): 248.72, #queue-req: 167
- 2025-07-20 15:39:42,883 - __main__ - INFO - sglang running req: 11 queue req: 167
- 2025-07-20 15:39:43,183 - sglang - INFO - [2025-07-20 15:39:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 166
- 2025-07-20 15:39:43,183 - __main__ - INFO - sglang running req: 10 queue req: 166
- 2025-07-20 15:39:43,793 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:43,794 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 885.80 1036.27
- sglang_output_tokens 254.52 293.79
- 2025-07-20 15:39:43,794 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 333 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:44,631 - sglang - INFO - [2025-07-20 15:39:44 TP0] Decode batch. #running-req: 11, #token: 29444, token usage: 0.78, gen throughput (token/s): 251.18, #queue-req: 166
- 2025-07-20 15:39:44,631 - __main__ - INFO - sglang running req: 11 queue req: 166
- 2025-07-20 15:39:44,978 - sglang - INFO - [2025-07-20 15:39:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 165
- 2025-07-20 15:39:44,978 - __main__ - INFO - sglang running req: 10 queue req: 165
- 2025-07-20 15:39:46,411 - sglang - INFO - [2025-07-20 15:39:46 TP0] Decode batch. #running-req: 11, #token: 30650, token usage: 0.81, gen throughput (token/s): 246.66, #queue-req: 165
- 2025-07-20 15:39:46,411 - __main__ - INFO - sglang running req: 11 queue req: 165
- 2025-07-20 15:39:46,857 - sglang - INFO - [2025-07-20 15:39:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1633, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 164
- 2025-07-20 15:39:46,857 - __main__ - INFO - sglang running req: 10 queue req: 164
- 2025-07-20 15:39:47,986 - sglang - INFO - [2025-07-20 15:39:47 TP0] Decode batch. #running-req: 11, #token: 29602, token usage: 0.78, gen throughput (token/s): 278.75, #queue-req: 164
- 2025-07-20 15:39:47,986 - __main__ - INFO - sglang running req: 11 queue req: 164
- 2025-07-20 15:39:48,978 - sglang - INFO - [2025-07-20 15:39:48 TP0] Decode batch. #running-req: 11, #token: 30042, token usage: 0.79, gen throughput (token/s): 443.30, #queue-req: 164
- 2025-07-20 15:39:48,978 - __main__ - INFO - sglang running req: 11 queue req: 164
- 2025-07-20 15:39:49,999 - sglang - INFO - [2025-07-20 15:39:49 TP0] Decode batch. #running-req: 11, #token: 30482, token usage: 0.80, gen throughput (token/s): 430.98, #queue-req: 164
- 2025-07-20 15:39:50,000 - __main__ - INFO - sglang running req: 11 queue req: 164
- 2025-07-20 15:39:51,045 - sglang - INFO - [2025-07-20 15:39:51 TP0] Decode batch. #running-req: 11, #token: 30922, token usage: 0.81, gen throughput (token/s): 420.89, #queue-req: 164
- 2025-07-20 15:39:51,045 - __main__ - INFO - sglang running req: 11 queue req: 164
- 2025-07-20 15:39:51,569 - sglang - INFO - [2025-07-20 15:39:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 163
- 2025-07-20 15:39:51,569 - __main__ - INFO - sglang running req: 10 queue req: 163
- 2025-07-20 15:39:52,949 - sglang - INFO - [2025-07-20 15:39:52 TP0] Decode batch. #running-req: 11, #token: 30786, token usage: 0.81, gen throughput (token/s): 230.54, #queue-req: 163
- 2025-07-20 15:39:52,949 - __main__ - INFO - sglang running req: 11 queue req: 163
- 2025-07-20 15:39:53,795 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:39:53,796 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 883.07 1034.59
- sglang_output_tokens 253.42 292.83
- 2025-07-20 15:39:53,796 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 336 | 500
- 1 | 0 | 10
- 2025-07-20 15:39:53,968 - sglang - INFO - [2025-07-20 15:39:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2778, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 162
- 2025-07-20 15:39:53,968 - __main__ - INFO - sglang running req: 10 queue req: 162
- 2025-07-20 15:39:54,802 - sglang - INFO - [2025-07-20 15:39:54 TP0] Decode batch. #running-req: 11, #token: 30372, token usage: 0.80, gen throughput (token/s): 236.95, #queue-req: 162
- 2025-07-20 15:39:54,802 - __main__ - INFO - sglang running req: 11 queue req: 162
- 2025-07-20 15:39:55,793 - sglang - INFO - [2025-07-20 15:39:55 TP0] Decode batch. #running-req: 11, #token: 30812, token usage: 0.81, gen throughput (token/s): 443.85, #queue-req: 162
- 2025-07-20 15:39:55,793 - __main__ - INFO - sglang running req: 11 queue req: 162
- 2025-07-20 15:39:56,786 - sglang - INFO - [2025-07-20 15:39:56 TP0] Decode batch. #running-req: 10, #token: 28571, token usage: 0.75, gen throughput (token/s): 442.00, #queue-req: 162
- 2025-07-20 15:39:56,786 - __main__ - INFO - sglang running req: 10 queue req: 162
- 2025-07-20 15:39:56,787 - sglang - INFO - [2025-07-20 15:39:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2333, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 161
- 2025-07-20 15:39:56,787 - __main__ - INFO - sglang running req: 10 queue req: 161
- 2025-07-20 15:39:58,010 - sglang - INFO - [2025-07-20 15:39:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1972, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 160
- 2025-07-20 15:39:58,010 - __main__ - INFO - sglang running req: 10 queue req: 160
- 2025-07-20 15:39:59,190 - sglang - INFO - [2025-07-20 15:39:59 TP0] Decode batch. #running-req: 11, #token: 31362, token usage: 0.83, gen throughput (token/s): 182.65, #queue-req: 160
- 2025-07-20 15:39:59,190 - __main__ - INFO - sglang running req: 11 queue req: 160
- 2025-07-20 15:39:59,935 - sglang - INFO - [2025-07-20 15:39:59 TP0] Prefill batch. #new-seq: 1, #new-token: 1774, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 159
- 2025-07-20 15:39:59,935 - __main__ - INFO - sglang running req: 10 queue req: 159
- 2025-07-20 15:40:00,792 - sglang - INFO - [2025-07-20 15:40:00 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 273.92, #queue-req: 159
- 2025-07-20 15:40:00,793 - __main__ - INFO - sglang running req: 11 queue req: 159
- 2025-07-20 15:40:01,782 - sglang - INFO - [2025-07-20 15:40:01 TP0] Decode batch. #running-req: 11, #token: 31238, token usage: 0.82, gen throughput (token/s): 444.65, #queue-req: 159
- 2025-07-20 15:40:01,783 - __main__ - INFO - sglang running req: 11 queue req: 159
- 2025-07-20 15:40:02,775 - sglang - INFO - [2025-07-20 15:40:02 TP0] Decode batch. #running-req: 11, #token: 31678, token usage: 0.83, gen throughput (token/s): 442.95, #queue-req: 159
- 2025-07-20 15:40:02,775 - __main__ - INFO - sglang running req: 11 queue req: 159
- 2025-07-20 15:40:03,573 - sglang - INFO - [2025-07-20 15:40:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2115, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 158
- 2025-07-20 15:40:03,573 - __main__ - INFO - sglang running req: 10 queue req: 158
- 2025-07-20 15:40:03,797 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:03,797 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 885.79 1031.67
- sglang_output_tokens 254.26 293.35
- 2025-07-20 15:40:03,797 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 341 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:04,458 - sglang - INFO - [2025-07-20 15:40:04 TP0] Decode batch. #running-req: 11, #token: 30668, token usage: 0.81, gen throughput (token/s): 260.83, #queue-req: 158
- 2025-07-20 15:40:04,458 - __main__ - INFO - sglang running req: 11 queue req: 158
- 2025-07-20 15:40:05,582 - sglang - INFO - [2025-07-20 15:40:05 TP0] Decode batch. #running-req: 11, #token: 31108, token usage: 0.82, gen throughput (token/s): 391.62, #queue-req: 158
- 2025-07-20 15:40:05,582 - __main__ - INFO - sglang running req: 11 queue req: 158
- 2025-07-20 15:40:05,836 - sglang - INFO - [2025-07-20 15:40:05 TP0] Prefill batch. #new-seq: 1, #new-token: 3184, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 157
- 2025-07-20 15:40:05,836 - __main__ - INFO - sglang running req: 10 queue req: 157
- 2025-07-20 15:40:07,503 - sglang - INFO - [2025-07-20 15:40:07 TP0] Decode batch. #running-req: 11, #token: 31660, token usage: 0.83, gen throughput (token/s): 228.45, #queue-req: 157
- 2025-07-20 15:40:07,504 - __main__ - INFO - sglang running req: 11 queue req: 157
- 2025-07-20 15:40:08,494 - sglang - INFO - [2025-07-20 15:40:08 TP0] Decode batch. #running-req: 11, #token: 32100, token usage: 0.85, gen throughput (token/s): 444.28, #queue-req: 157
- 2025-07-20 15:40:08,494 - __main__ - INFO - sglang running req: 11 queue req: 157
- 2025-07-20 15:40:09,485 - sglang - INFO - [2025-07-20 15:40:09 TP0] Decode batch. #running-req: 11, #token: 32540, token usage: 0.86, gen throughput (token/s): 443.89, #queue-req: 157
- 2025-07-20 15:40:09,485 - __main__ - INFO - sglang running req: 11 queue req: 157
- 2025-07-20 15:40:09,585 - sglang - INFO - [2025-07-20 15:40:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 156
- 2025-07-20 15:40:09,585 - __main__ - INFO - sglang running req: 10 queue req: 156
- 2025-07-20 15:40:10,807 - sglang - INFO - [2025-07-20 15:40:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2129, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 155
- 2025-07-20 15:40:10,807 - __main__ - INFO - sglang running req: 10 queue req: 155
- 2025-07-20 15:40:11,803 - sglang - INFO - [2025-07-20 15:40:11 TP0] Decode batch. #running-req: 11, #token: 31163, token usage: 0.82, gen throughput (token/s): 188.94, #queue-req: 155
- 2025-07-20 15:40:11,803 - __main__ - INFO - sglang running req: 11 queue req: 155
- 2025-07-20 15:40:12,597 - sglang - INFO - [2025-07-20 15:40:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1367, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 154
- 2025-07-20 15:40:12,598 - __main__ - INFO - sglang running req: 10 queue req: 154
- 2025-07-20 15:40:13,316 - sglang - INFO - [2025-07-20 15:40:13 TP0] Decode batch. #running-req: 11, #token: 29364, token usage: 0.77, gen throughput (token/s): 290.12, #queue-req: 154
- 2025-07-20 15:40:13,317 - __main__ - INFO - sglang running req: 11 queue req: 154
- 2025-07-20 15:40:13,564 - sglang - INFO - [2025-07-20 15:40:13 TP0] Prefill batch. #new-seq: 2, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 152
- 2025-07-20 15:40:13,564 - __main__ - INFO - sglang running req: 10 queue req: 152
- 2025-07-20 15:40:13,798 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:13,798 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 889.52 1033.64
- sglang_output_tokens 255.42 292.94
- 2025-07-20 15:40:13,798 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 346 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:15,347 - sglang - INFO - [2025-07-20 15:40:15 TP0] Decode batch. #running-req: 12, #token: 29111, token usage: 0.77, gen throughput (token/s): 230.95, #queue-req: 152
- 2025-07-20 15:40:15,347 - __main__ - INFO - sglang running req: 12 queue req: 152
- 2025-07-20 15:40:15,769 - sglang - INFO - [2025-07-20 15:40:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 151
- 2025-07-20 15:40:15,769 - __main__ - INFO - sglang running req: 11 queue req: 151
- 2025-07-20 15:40:16,773 - sglang - INFO - [2025-07-20 15:40:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2625, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 11, #queue-req: 150
- 2025-07-20 15:40:16,773 - __main__ - INFO - sglang running req: 11 queue req: 150
- 2025-07-20 15:40:17,859 - sglang - INFO - [2025-07-20 15:40:17 TP0] Decode batch. #running-req: 12, #token: 28745, token usage: 0.76, gen throughput (token/s): 190.34, #queue-req: 150
- 2025-07-20 15:40:17,859 - __main__ - INFO - sglang running req: 12 queue req: 150
- 2025-07-20 15:40:18,385 - sglang - INFO - [2025-07-20 15:40:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 11, #queue-req: 149
- 2025-07-20 15:40:18,385 - __main__ - INFO - sglang running req: 11 queue req: 149
- 2025-07-20 15:40:19,527 - sglang - INFO - [2025-07-20 15:40:19 TP0] Decode batch. #running-req: 12, #token: 27619, token usage: 0.73, gen throughput (token/s): 287.02, #queue-req: 149
- 2025-07-20 15:40:19,527 - __main__ - INFO - sglang running req: 12 queue req: 149
- 2025-07-20 15:40:20,145 - sglang - INFO - [2025-07-20 15:40:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2126, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 148
- 2025-07-20 15:40:20,145 - __main__ - INFO - sglang running req: 11 queue req: 148
- 2025-07-20 15:40:21,189 - sglang - INFO - [2025-07-20 15:40:21 TP0] Decode batch. #running-req: 12, #token: 28713, token usage: 0.76, gen throughput (token/s): 288.17, #queue-req: 148
- 2025-07-20 15:40:21,189 - __main__ - INFO - sglang running req: 12 queue req: 148
- 2025-07-20 15:40:21,412 - sglang - INFO - [2025-07-20 15:40:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2381, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 11, #queue-req: 147
- 2025-07-20 15:40:21,413 - __main__ - INFO - sglang running req: 11 queue req: 147
- 2025-07-20 15:40:22,608 - sglang - INFO - [2025-07-20 15:40:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2369, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 11, #queue-req: 146
- 2025-07-20 15:40:22,608 - __main__ - INFO - sglang running req: 11 queue req: 146
- 2025-07-20 15:40:23,678 - sglang - INFO - [2025-07-20 15:40:23 TP0] Decode batch. #running-req: 12, #token: 28768, token usage: 0.76, gen throughput (token/s): 192.10, #queue-req: 146
- 2025-07-20 15:40:23,678 - __main__ - INFO - sglang running req: 12 queue req: 146
- 2025-07-20 15:40:23,799 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:23,799 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 893.22 1039.33
- sglang_output_tokens 255.97 292.16
- 2025-07-20 15:40:23,799 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 352 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:24,590 - sglang - INFO - [2025-07-20 15:40:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2572, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 145
- 2025-07-20 15:40:24,590 - __main__ - INFO - sglang running req: 11 queue req: 145
- 2025-07-20 15:40:25,452 - sglang - INFO - [2025-07-20 15:40:25 TP0] Decode batch. #running-req: 12, #token: 30208, token usage: 0.80, gen throughput (token/s): 269.94, #queue-req: 145
- 2025-07-20 15:40:25,452 - __main__ - INFO - sglang running req: 12 queue req: 145
- 2025-07-20 15:40:25,939 - sglang - INFO - [2025-07-20 15:40:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 144
- 2025-07-20 15:40:25,939 - __main__ - INFO - sglang running req: 11 queue req: 144
- 2025-07-20 15:40:27,236 - sglang - INFO - [2025-07-20 15:40:27 TP0] Decode batch. #running-req: 12, #token: 31218, token usage: 0.82, gen throughput (token/s): 268.53, #queue-req: 144
- 2025-07-20 15:40:27,236 - __main__ - INFO - sglang running req: 12 queue req: 144
- 2025-07-20 15:40:28,357 - sglang - INFO - [2025-07-20 15:40:28 TP0] Decode batch. #running-req: 12, #token: 31698, token usage: 0.83, gen throughput (token/s): 428.14, #queue-req: 144
- 2025-07-20 15:40:28,357 - __main__ - INFO - sglang running req: 12 queue req: 144
- 2025-07-20 15:40:29,361 - sglang - INFO - [2025-07-20 15:40:29 TP0] Decode batch. #running-req: 11, #token: 30054, token usage: 0.79, gen throughput (token/s): 457.17, #queue-req: 144
- 2025-07-20 15:40:29,361 - __main__ - INFO - sglang running req: 11 queue req: 144
- 2025-07-20 15:40:30,129 - sglang - INFO - [2025-07-20 15:40:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2327, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 143
- 2025-07-20 15:40:30,129 - __main__ - INFO - sglang running req: 10 queue req: 143
- 2025-07-20 15:40:31,101 - sglang - INFO - [2025-07-20 15:40:31 TP0] Decode batch. #running-req: 11, #token: 29131, token usage: 0.77, gen throughput (token/s): 252.33, #queue-req: 143
- 2025-07-20 15:40:31,101 - __main__ - INFO - sglang running req: 11 queue req: 143
- 2025-07-20 15:40:32,086 - sglang - INFO - [2025-07-20 15:40:32 TP0] Decode batch. #running-req: 11, #token: 29571, token usage: 0.78, gen throughput (token/s): 446.58, #queue-req: 143
- 2025-07-20 15:40:32,086 - __main__ - INFO - sglang running req: 11 queue req: 143
- 2025-07-20 15:40:32,581 - sglang - INFO - [2025-07-20 15:40:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2127, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 142
- 2025-07-20 15:40:32,581 - __main__ - INFO - sglang running req: 10 queue req: 142
- 2025-07-20 15:40:33,749 - sglang - INFO - [2025-07-20 15:40:33 TP0] Decode batch. #running-req: 11, #token: 29960, token usage: 0.79, gen throughput (token/s): 264.09, #queue-req: 142
- 2025-07-20 15:40:33,749 - __main__ - INFO - sglang running req: 11 queue req: 142
- 2025-07-20 15:40:33,800 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:33,800 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 893.46 1024.07
- sglang_output_tokens 255.43 284.95
- 2025-07-20 15:40:33,801 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 357 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:34,462 - sglang - INFO - [2025-07-20 15:40:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2527, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 141
- 2025-07-20 15:40:34,462 - __main__ - INFO - sglang running req: 10 queue req: 141
- 2025-07-20 15:40:35,583 - sglang - INFO - [2025-07-20 15:40:35 TP0] Decode batch. #running-req: 11, #token: 30056, token usage: 0.79, gen throughput (token/s): 239.34, #queue-req: 141
- 2025-07-20 15:40:35,583 - __main__ - INFO - sglang running req: 11 queue req: 141
- 2025-07-20 15:40:36,752 - sglang - INFO - [2025-07-20 15:40:36 TP0] Decode batch. #running-req: 11, #token: 30496, token usage: 0.80, gen throughput (token/s): 376.38, #queue-req: 141
- 2025-07-20 15:40:36,752 - __main__ - INFO - sglang running req: 11 queue req: 141
- 2025-07-20 15:40:37,524 - sglang - INFO - [2025-07-20 15:40:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2644, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 140
- 2025-07-20 15:40:37,524 - __main__ - INFO - sglang running req: 10 queue req: 140
- 2025-07-20 15:40:38,588 - sglang - INFO - [2025-07-20 15:40:38 TP0] Decode batch. #running-req: 11, #token: 30809, token usage: 0.81, gen throughput (token/s): 239.13, #queue-req: 140
- 2025-07-20 15:40:38,588 - __main__ - INFO - sglang running req: 11 queue req: 140
- 2025-07-20 15:40:39,578 - sglang - INFO - [2025-07-20 15:40:39 TP0] Decode batch. #running-req: 11, #token: 31249, token usage: 0.82, gen throughput (token/s): 444.28, #queue-req: 140
- 2025-07-20 15:40:39,578 - __main__ - INFO - sglang running req: 11 queue req: 140
- 2025-07-20 15:40:40,174 - sglang - INFO - [2025-07-20 15:40:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2495, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 139
- 2025-07-20 15:40:40,175 - __main__ - INFO - sglang running req: 10 queue req: 139
- 2025-07-20 15:40:41,328 - sglang - INFO - [2025-07-20 15:40:41 TP0] Decode batch. #running-req: 11, #token: 30964, token usage: 0.82, gen throughput (token/s): 250.83, #queue-req: 139
- 2025-07-20 15:40:41,328 - __main__ - INFO - sglang running req: 11 queue req: 139
- 2025-07-20 15:40:41,428 - sglang - INFO - [2025-07-20 15:40:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1731, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 138
- 2025-07-20 15:40:41,428 - __main__ - INFO - sglang running req: 10 queue req: 138
- 2025-07-20 15:40:42,770 - sglang - INFO - [2025-07-20 15:40:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1754, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 137
- 2025-07-20 15:40:42,770 - __main__ - INFO - sglang running req: 10 queue req: 137
- 2025-07-20 15:40:43,710 - sglang - INFO - [2025-07-20 15:40:43 TP0] Decode batch. #running-req: 11, #token: 29102, token usage: 0.77, gen throughput (token/s): 183.89, #queue-req: 137
- 2025-07-20 15:40:43,710 - __main__ - INFO - sglang running req: 11 queue req: 137
- 2025-07-20 15:40:43,802 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:43,802 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 896.28 1053.09
- sglang_output_tokens 255.93 291.94
- 2025-07-20 15:40:43,802 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 362 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:44,749 - sglang - INFO - [2025-07-20 15:40:44 TP0] Decode batch. #running-req: 11, #token: 29542, token usage: 0.78, gen throughput (token/s): 423.25, #queue-req: 137
- 2025-07-20 15:40:44,750 - __main__ - INFO - sglang running req: 11 queue req: 137
- 2025-07-20 15:40:44,948 - sglang - INFO - [2025-07-20 15:40:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2367, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 136
- 2025-07-20 15:40:44,948 - __main__ - INFO - sglang running req: 10 queue req: 136
- 2025-07-20 15:40:45,871 - sglang - INFO - [2025-07-20 15:40:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3932, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 10, #queue-req: 134
- 2025-07-20 15:40:45,871 - __main__ - INFO - sglang running req: 10 queue req: 134
- 2025-07-20 15:40:47,756 - sglang - INFO - [2025-07-20 15:40:47 TP0] Decode batch. #running-req: 12, #token: 29497, token usage: 0.78, gen throughput (token/s): 154.00, #queue-req: 134
- 2025-07-20 15:40:47,756 - __main__ - INFO - sglang running req: 12 queue req: 134
- 2025-07-20 15:40:47,855 - sglang - INFO - [2025-07-20 15:40:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 133
- 2025-07-20 15:40:47,855 - __main__ - INFO - sglang running req: 11 queue req: 133
- 2025-07-20 15:40:49,578 - sglang - INFO - [2025-07-20 15:40:49 TP0] Decode batch. #running-req: 12, #token: 30052, token usage: 0.79, gen throughput (token/s): 262.86, #queue-req: 133
- 2025-07-20 15:40:49,578 - __main__ - INFO - sglang running req: 12 queue req: 133
- 2025-07-20 15:40:50,570 - sglang - INFO - [2025-07-20 15:40:50 TP0] Decode batch. #running-req: 12, #token: 30532, token usage: 0.80, gen throughput (token/s): 484.09, #queue-req: 133
- 2025-07-20 15:40:50,570 - __main__ - INFO - sglang running req: 12 queue req: 133
- 2025-07-20 15:40:50,972 - sglang - INFO - [2025-07-20 15:40:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2204, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 132
- 2025-07-20 15:40:50,972 - __main__ - INFO - sglang running req: 11 queue req: 132
- 2025-07-20 15:40:52,438 - sglang - INFO - [2025-07-20 15:40:52 TP0] Decode batch. #running-req: 12, #token: 29986, token usage: 0.79, gen throughput (token/s): 256.49, #queue-req: 132
- 2025-07-20 15:40:52,438 - __main__ - INFO - sglang running req: 12 queue req: 132
- 2025-07-20 15:40:53,568 - sglang - INFO - [2025-07-20 15:40:53 TP0] Decode batch. #running-req: 12, #token: 30466, token usage: 0.80, gen throughput (token/s): 424.71, #queue-req: 132
- 2025-07-20 15:40:53,568 - __main__ - INFO - sglang running req: 12 queue req: 132
- 2025-07-20 15:40:53,803 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:40:53,804 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 897.40 1054.66
- sglang_output_tokens 256.17 292.68
- 2025-07-20 15:40:53,804 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 366 | 500
- 1 | 0 | 10
- 2025-07-20 15:40:54,693 - sglang - INFO - [2025-07-20 15:40:54 TP0] Decode batch. #running-req: 12, #token: 29283, token usage: 0.77, gen throughput (token/s): 426.35, #queue-req: 132
- 2025-07-20 15:40:54,694 - __main__ - INFO - sglang running req: 12 queue req: 132
- 2025-07-20 15:40:54,718 - sglang - INFO - [2025-07-20 15:40:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1423, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 131
- 2025-07-20 15:40:54,718 - __main__ - INFO - sglang running req: 11 queue req: 131
- 2025-07-20 15:40:56,215 - sglang - INFO - [2025-07-20 15:40:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2209, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 130
- 2025-07-20 15:40:56,216 - __main__ - INFO - sglang running req: 11 queue req: 130
- 2025-07-20 15:40:56,966 - sglang - INFO - [2025-07-20 15:40:56 TP0] Decode batch. #running-req: 12, #token: 30419, token usage: 0.80, gen throughput (token/s): 210.33, #queue-req: 130
- 2025-07-20 15:40:56,966 - __main__ - INFO - sglang running req: 12 queue req: 130
- 2025-07-20 15:40:57,190 - sglang - INFO - [2025-07-20 15:40:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2313, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 129
- 2025-07-20 15:40:57,190 - __main__ - INFO - sglang running req: 11 queue req: 129
- 2025-07-20 15:40:58,813 - sglang - INFO - [2025-07-20 15:40:58 TP0] Decode batch. #running-req: 12, #token: 30496, token usage: 0.80, gen throughput (token/s): 259.34, #queue-req: 129
- 2025-07-20 15:40:58,813 - __main__ - INFO - sglang running req: 12 queue req: 129
- 2025-07-20 15:40:59,107 - sglang - INFO - [2025-07-20 15:40:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2900, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 128
- 2025-07-20 15:40:59,107 - __main__ - INFO - sglang running req: 11 queue req: 128
- 2025-07-20 15:41:00,820 - sglang - INFO - [2025-07-20 15:41:00 TP0] Decode batch. #running-req: 11, #token: 29574, token usage: 0.78, gen throughput (token/s): 232.20, #queue-req: 128
- 2025-07-20 15:41:00,820 - __main__ - INFO - sglang running req: 11 queue req: 128
- 2025-07-20 15:41:01,911 - sglang - INFO - [2025-07-20 15:41:01 TP0] Decode batch. #running-req: 11, #token: 30014, token usage: 0.79, gen throughput (token/s): 403.40, #queue-req: 128
- 2025-07-20 15:41:01,911 - __main__ - INFO - sglang running req: 11 queue req: 128
- 2025-07-20 15:41:02,949 - sglang - INFO - [2025-07-20 15:41:02 TP0] Decode batch. #running-req: 11, #token: 30454, token usage: 0.80, gen throughput (token/s): 423.59, #queue-req: 128
- 2025-07-20 15:41:02,950 - __main__ - INFO - sglang running req: 11 queue req: 128
- 2025-07-20 15:41:03,447 - sglang - INFO - [2025-07-20 15:41:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 127
- 2025-07-20 15:41:03,447 - __main__ - INFO - sglang running req: 10 queue req: 127
- 2025-07-20 15:41:03,806 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:03,806 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 899.28 1036.11
- sglang_output_tokens 256.09 284.11
- 2025-07-20 15:41:03,806 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 372 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:04,805 - sglang - INFO - [2025-07-20 15:41:04 TP0] Decode batch. #running-req: 11, #token: 31923, token usage: 0.84, gen throughput (token/s): 236.62, #queue-req: 127
- 2025-07-20 15:41:04,805 - __main__ - INFO - sglang running req: 11 queue req: 127
- 2025-07-20 15:41:05,912 - sglang - INFO - [2025-07-20 15:41:05 TP0] Decode batch. #running-req: 11, #token: 32363, token usage: 0.85, gen throughput (token/s): 397.46, #queue-req: 127
- 2025-07-20 15:41:05,912 - __main__ - INFO - sglang running req: 11 queue req: 127
- 2025-07-20 15:41:07,047 - sglang - INFO - [2025-07-20 15:41:07 TP0] Decode batch. #running-req: 11, #token: 32803, token usage: 0.86, gen throughput (token/s): 387.43, #queue-req: 127
- 2025-07-20 15:41:07,047 - __main__ - INFO - sglang running req: 11 queue req: 127
- 2025-07-20 15:41:07,160 - sglang - INFO - [2025-07-20 15:41:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1875, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 126
- 2025-07-20 15:41:07,160 - __main__ - INFO - sglang running req: 10 queue req: 126
- 2025-07-20 15:41:08,738 - sglang - INFO - [2025-07-20 15:41:08 TP0] Decode batch. #running-req: 11, #token: 32195, token usage: 0.85, gen throughput (token/s): 259.64, #queue-req: 126
- 2025-07-20 15:41:08,738 - __main__ - INFO - sglang running req: 11 queue req: 126
- 2025-07-20 15:41:09,871 - sglang - INFO - [2025-07-20 15:41:09 TP0] Decode batch. #running-req: 11, #token: 32635, token usage: 0.86, gen throughput (token/s): 388.46, #queue-req: 126
- 2025-07-20 15:41:09,871 - __main__ - INFO - sglang running req: 11 queue req: 126
- 2025-07-20 15:41:09,983 - sglang - INFO - [2025-07-20 15:41:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2011, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 125
- 2025-07-20 15:41:09,983 - __main__ - INFO - sglang running req: 10 queue req: 125
- 2025-07-20 15:41:11,697 - sglang - INFO - [2025-07-20 15:41:11 TP0] Decode batch. #running-req: 11, #token: 31851, token usage: 0.84, gen throughput (token/s): 240.45, #queue-req: 125
- 2025-07-20 15:41:11,697 - __main__ - INFO - sglang running req: 11 queue req: 125
- 2025-07-20 15:41:12,819 - sglang - INFO - [2025-07-20 15:41:12 TP0] Decode batch. #running-req: 11, #token: 32291, token usage: 0.85, gen throughput (token/s): 392.02, #queue-req: 125
- 2025-07-20 15:41:12,819 - __main__ - INFO - sglang running req: 11 queue req: 125
- 2025-07-20 15:41:13,458 - sglang - INFO - [2025-07-20 15:41:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 124
- 2025-07-20 15:41:13,459 - __main__ - INFO - sglang running req: 10 queue req: 124
- 2025-07-20 15:41:13,808 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:13,809 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 897.64 1037.25
- sglang_output_tokens 255.81 286.63
- 2025-07-20 15:41:13,809 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 375 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:14,699 - sglang - INFO - [2025-07-20 15:41:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 123
- 2025-07-20 15:41:14,699 - __main__ - INFO - sglang running req: 10 queue req: 123
- 2025-07-20 15:41:15,562 - sglang - INFO - [2025-07-20 15:41:15 TP0] Decode batch. #running-req: 10, #token: 28281, token usage: 0.74, gen throughput (token/s): 159.34, #queue-req: 123
- 2025-07-20 15:41:15,562 - __main__ - INFO - sglang running req: 10 queue req: 123
- 2025-07-20 15:41:15,562 - sglang - INFO - [2025-07-20 15:41:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2586, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 122
- 2025-07-20 15:41:15,562 - __main__ - INFO - sglang running req: 10 queue req: 122
- 2025-07-20 15:41:17,561 - sglang - INFO - [2025-07-20 15:41:17 TP0] Decode batch. #running-req: 11, #token: 31307, token usage: 0.82, gen throughput (token/s): 220.05, #queue-req: 122
- 2025-07-20 15:41:17,561 - __main__ - INFO - sglang running req: 11 queue req: 122
- 2025-07-20 15:41:18,656 - sglang - INFO - [2025-07-20 15:41:18 TP0] Decode batch. #running-req: 11, #token: 31747, token usage: 0.84, gen throughput (token/s): 402.10, #queue-req: 122
- 2025-07-20 15:41:18,656 - __main__ - INFO - sglang running req: 11 queue req: 122
- 2025-07-20 15:41:19,658 - sglang - INFO - [2025-07-20 15:41:19 TP0] Decode batch. #running-req: 10, #token: 29940, token usage: 0.79, gen throughput (token/s): 438.00, #queue-req: 122
- 2025-07-20 15:41:19,658 - __main__ - INFO - sglang running req: 10 queue req: 122
- 2025-07-20 15:41:19,658 - sglang - INFO - [2025-07-20 15:41:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1851, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 121
- 2025-07-20 15:41:19,658 - __main__ - INFO - sglang running req: 10 queue req: 121
- 2025-07-20 15:41:20,481 - sglang - INFO - [2025-07-20 15:41:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2162, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 120
- 2025-07-20 15:41:20,482 - __main__ - INFO - sglang running req: 10 queue req: 120
- 2025-07-20 15:41:21,626 - sglang - INFO - [2025-07-20 15:41:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2298, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 119
- 2025-07-20 15:41:21,627 - __main__ - INFO - sglang running req: 10 queue req: 119
- 2025-07-20 15:41:22,707 - sglang - INFO - [2025-07-20 15:41:22 TP0] Decode batch. #running-req: 11, #token: 30013, token usage: 0.79, gen throughput (token/s): 143.62, #queue-req: 119
- 2025-07-20 15:41:22,708 - __main__ - INFO - sglang running req: 11 queue req: 119
- 2025-07-20 15:41:23,479 - sglang - INFO - [2025-07-20 15:41:23 TP0] Prefill batch. #new-seq: 2, #new-token: 4757, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.62, #running-req: 9, #queue-req: 117
- 2025-07-20 15:41:23,479 - __main__ - INFO - sglang running req: 9 queue req: 117
- 2025-07-20 15:41:23,810 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:23,811 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 905.47 1051.12
- sglang_output_tokens 258.13 290.71
- 2025-07-20 15:41:23,811 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 382 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:25,161 - sglang - INFO - [2025-07-20 15:41:25 TP0] Decode batch. #running-req: 11, #token: 28565, token usage: 0.75, gen throughput (token/s): 178.49, #queue-req: 117
- 2025-07-20 15:41:25,161 - __main__ - INFO - sglang running req: 11 queue req: 117
- 2025-07-20 15:41:26,149 - sglang - INFO - [2025-07-20 15:41:26 TP0] Decode batch. #running-req: 11, #token: 29005, token usage: 0.76, gen throughput (token/s): 445.71, #queue-req: 117
- 2025-07-20 15:41:26,149 - __main__ - INFO - sglang running req: 11 queue req: 117
- 2025-07-20 15:41:27,136 - sglang - INFO - [2025-07-20 15:41:27 TP0] Decode batch. #running-req: 11, #token: 29445, token usage: 0.78, gen throughput (token/s): 445.45, #queue-req: 117
- 2025-07-20 15:41:27,137 - __main__ - INFO - sglang running req: 11 queue req: 117
- 2025-07-20 15:41:28,133 - sglang - INFO - [2025-07-20 15:41:28 TP0] Decode batch. #running-req: 11, #token: 29885, token usage: 0.79, gen throughput (token/s): 441.40, #queue-req: 117
- 2025-07-20 15:41:28,133 - __main__ - INFO - sglang running req: 11 queue req: 117
- 2025-07-20 15:41:28,776 - sglang - INFO - [2025-07-20 15:41:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 116
- 2025-07-20 15:41:28,776 - __main__ - INFO - sglang running req: 10 queue req: 116
- 2025-07-20 15:41:29,951 - sglang - INFO - [2025-07-20 15:41:29 TP0] Decode batch. #running-req: 11, #token: 30280, token usage: 0.80, gen throughput (token/s): 241.52, #queue-req: 116
- 2025-07-20 15:41:29,951 - __main__ - INFO - sglang running req: 11 queue req: 116
- 2025-07-20 15:41:30,942 - sglang - INFO - [2025-07-20 15:41:30 TP0] Decode batch. #running-req: 11, #token: 30720, token usage: 0.81, gen throughput (token/s): 444.05, #queue-req: 116
- 2025-07-20 15:41:30,942 - __main__ - INFO - sglang running req: 11 queue req: 116
- 2025-07-20 15:41:31,931 - sglang - INFO - [2025-07-20 15:41:31 TP0] Decode batch. #running-req: 11, #token: 31160, token usage: 0.82, gen throughput (token/s): 444.87, #queue-req: 116
- 2025-07-20 15:41:31,931 - __main__ - INFO - sglang running req: 11 queue req: 116
- 2025-07-20 15:41:32,352 - sglang - INFO - [2025-07-20 15:41:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 115
- 2025-07-20 15:41:32,352 - __main__ - INFO - sglang running req: 10 queue req: 115
- 2025-07-20 15:41:33,655 - sglang - INFO - [2025-07-20 15:41:33 TP0] Decode batch. #running-req: 11, #token: 31694, token usage: 0.83, gen throughput (token/s): 254.67, #queue-req: 115
- 2025-07-20 15:41:33,655 - __main__ - INFO - sglang running req: 11 queue req: 115
- 2025-07-20 15:41:33,811 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:33,812 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 900.14 1029.45
- sglang_output_tokens 256.43 283.40
- 2025-07-20 15:41:33,812 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 384 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:34,027 - sglang - INFO - [2025-07-20 15:41:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 114
- 2025-07-20 15:41:34,028 - __main__ - INFO - sglang running req: 10 queue req: 114
- 2025-07-20 15:41:35,404 - sglang - INFO - [2025-07-20 15:41:35 TP0] Decode batch. #running-req: 11, #token: 30785, token usage: 0.81, gen throughput (token/s): 250.99, #queue-req: 114
- 2025-07-20 15:41:35,404 - __main__ - INFO - sglang running req: 11 queue req: 114
- 2025-07-20 15:41:35,528 - sglang - INFO - [2025-07-20 15:41:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2121, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 113
- 2025-07-20 15:41:35,528 - __main__ - INFO - sglang running req: 10 queue req: 113
- 2025-07-20 15:41:37,064 - sglang - INFO - [2025-07-20 15:41:37 TP0] Decode batch. #running-req: 11, #token: 29850, token usage: 0.79, gen throughput (token/s): 264.46, #queue-req: 113
- 2025-07-20 15:41:37,064 - __main__ - INFO - sglang running req: 11 queue req: 113
- 2025-07-20 15:41:38,055 - sglang - INFO - [2025-07-20 15:41:38 TP0] Decode batch. #running-req: 11, #token: 30290, token usage: 0.80, gen throughput (token/s): 444.07, #queue-req: 113
- 2025-07-20 15:41:38,055 - __main__ - INFO - sglang running req: 11 queue req: 113
- 2025-07-20 15:41:38,846 - sglang - INFO - [2025-07-20 15:41:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1775, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 112
- 2025-07-20 15:41:38,846 - __main__ - INFO - sglang running req: 10 queue req: 112
- 2025-07-20 15:41:39,653 - sglang - INFO - [2025-07-20 15:41:39 TP0] Decode batch. #running-req: 11, #token: 29287, token usage: 0.77, gen throughput (token/s): 274.72, #queue-req: 112
- 2025-07-20 15:41:39,653 - __main__ - INFO - sglang running req: 11 queue req: 112
- 2025-07-20 15:41:39,752 - sglang - INFO - [2025-07-20 15:41:39 TP0] Prefill batch. #new-seq: 2, #new-token: 4722, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 10, #queue-req: 110
- 2025-07-20 15:41:39,752 - __main__ - INFO - sglang running req: 10 queue req: 110
- 2025-07-20 15:41:41,211 - sglang - INFO - [2025-07-20 15:41:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 109
- 2025-07-20 15:41:41,211 - __main__ - INFO - sglang running req: 11 queue req: 109
- 2025-07-20 15:41:42,692 - sglang - INFO - [2025-07-20 15:41:42 TP0] Decode batch. #running-req: 12, #token: 30993, token usage: 0.82, gen throughput (token/s): 155.95, #queue-req: 109
- 2025-07-20 15:41:42,692 - __main__ - INFO - sglang running req: 12 queue req: 109
- 2025-07-20 15:41:43,688 - sglang - INFO - [2025-07-20 15:41:43 TP0] Decode batch. #running-req: 12, #token: 31473, token usage: 0.83, gen throughput (token/s): 481.97, #queue-req: 109
- 2025-07-20 15:41:43,688 - __main__ - INFO - sglang running req: 12 queue req: 109
- 2025-07-20 15:41:43,813 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:43,813 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 903.43 1053.85
- sglang_output_tokens 257.34 290.15
- 2025-07-20 15:41:43,813 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 389 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:44,684 - sglang - INFO - [2025-07-20 15:41:44 TP0] Decode batch. #running-req: 12, #token: 31953, token usage: 0.84, gen throughput (token/s): 481.55, #queue-req: 109
- 2025-07-20 15:41:44,685 - __main__ - INFO - sglang running req: 12 queue req: 109
- 2025-07-20 15:41:45,681 - sglang - INFO - [2025-07-20 15:41:45 TP0] Decode batch. #running-req: 12, #token: 32433, token usage: 0.85, gen throughput (token/s): 481.49, #queue-req: 109
- 2025-07-20 15:41:45,681 - __main__ - INFO - sglang running req: 12 queue req: 109
- 2025-07-20 15:41:46,676 - sglang - INFO - [2025-07-20 15:41:46 TP0] Decode batch. #running-req: 12, #token: 32913, token usage: 0.87, gen throughput (token/s): 482.60, #queue-req: 109
- 2025-07-20 15:41:46,676 - __main__ - INFO - sglang running req: 12 queue req: 109
- 2025-07-20 15:41:47,274 - sglang - INFO - [2025-07-20 15:41:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2738, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 108
- 2025-07-20 15:41:47,274 - __main__ - INFO - sglang running req: 11 queue req: 108
- 2025-07-20 15:41:48,480 - sglang - INFO - [2025-07-20 15:41:48 TP0] Decode batch. #running-req: 12, #token: 32310, token usage: 0.85, gen throughput (token/s): 265.51, #queue-req: 108
- 2025-07-20 15:41:48,480 - __main__ - INFO - sglang running req: 12 queue req: 108
- 2025-07-20 15:41:49,488 - sglang - INFO - [2025-07-20 15:41:49 TP0] Decode batch. #running-req: 12, #token: 32790, token usage: 0.86, gen throughput (token/s): 476.42, #queue-req: 108
- 2025-07-20 15:41:49,488 - __main__ - INFO - sglang running req: 12 queue req: 108
- 2025-07-20 15:41:50,488 - sglang - INFO - [2025-07-20 15:41:50 TP0] Decode batch. #running-req: 11, #token: 30275, token usage: 0.80, gen throughput (token/s): 479.01, #queue-req: 108
- 2025-07-20 15:41:50,488 - __main__ - INFO - sglang running req: 11 queue req: 108
- 2025-07-20 15:41:50,488 - sglang - INFO - [2025-07-20 15:41:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 11, #queue-req: 107
- 2025-07-20 15:41:50,488 - __main__ - INFO - sglang running req: 11 queue req: 107
- 2025-07-20 15:41:51,541 - sglang - INFO - [2025-07-20 15:41:51 TP0] Prefill batch. #new-seq: 1, #new-token: 1272, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 11, #queue-req: 106
- 2025-07-20 15:41:51,541 - __main__ - INFO - sglang running req: 11 queue req: 106
- 2025-07-20 15:41:52,296 - sglang - INFO - [2025-07-20 15:41:52 TP0] Prefill batch. #new-seq: 1, #new-token: 1269, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 105
- 2025-07-20 15:41:52,296 - __main__ - INFO - sglang running req: 11 queue req: 105
- 2025-07-20 15:41:53,152 - sglang - INFO - [2025-07-20 15:41:53 TP0] Decode batch. #running-req: 12, #token: 29980, token usage: 0.79, gen throughput (token/s): 179.44, #queue-req: 105
- 2025-07-20 15:41:53,152 - __main__ - INFO - sglang running req: 12 queue req: 105
- 2025-07-20 15:41:53,815 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:41:53,815 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 903.56 1035.16
- sglang_output_tokens 257.66 287.25
- 2025-07-20 15:41:53,815 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 393 | 500
- 1 | 0 | 10
- 2025-07-20 15:41:54,142 - sglang - INFO - [2025-07-20 15:41:54 TP0] Decode batch. #running-req: 12, #token: 30460, token usage: 0.80, gen throughput (token/s): 484.44, #queue-req: 105
- 2025-07-20 15:41:54,143 - __main__ - INFO - sglang running req: 12 queue req: 105
- 2025-07-20 15:41:54,192 - sglang - INFO - [2025-07-20 15:41:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2107, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 11, #queue-req: 104
- 2025-07-20 15:41:54,192 - __main__ - INFO - sglang running req: 11 queue req: 104
- 2025-07-20 15:41:55,069 - sglang - INFO - [2025-07-20 15:41:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2562, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 11, #queue-req: 103
- 2025-07-20 15:41:55,069 - __main__ - INFO - sglang running req: 11 queue req: 103
- 2025-07-20 15:41:56,622 - sglang - INFO - [2025-07-20 15:41:56 TP0] Decode batch. #running-req: 12, #token: 29137, token usage: 0.77, gen throughput (token/s): 192.73, #queue-req: 103
- 2025-07-20 15:41:56,623 - __main__ - INFO - sglang running req: 12 queue req: 103
- 2025-07-20 15:41:57,614 - sglang - INFO - [2025-07-20 15:41:57 TP0] Decode batch. #running-req: 12, #token: 29617, token usage: 0.78, gen throughput (token/s): 483.98, #queue-req: 103
- 2025-07-20 15:41:57,615 - __main__ - INFO - sglang running req: 12 queue req: 103
- 2025-07-20 15:41:57,912 - sglang - INFO - [2025-07-20 15:41:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1964, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 102
- 2025-07-20 15:41:57,912 - __main__ - INFO - sglang running req: 11 queue req: 102
- 2025-07-20 15:41:58,619 - sglang - INFO - [2025-07-20 15:41:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 101
- 2025-07-20 15:41:58,619 - __main__ - INFO - sglang running req: 11 queue req: 101
- 2025-07-20 15:41:59,441 - sglang - INFO - [2025-07-20 15:41:59 TP0] Prefill batch. #new-seq: 1, #new-token: 3261, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 100
- 2025-07-20 15:41:59,442 - __main__ - INFO - sglang running req: 11 queue req: 100
- 2025-07-20 15:42:00,550 - sglang - INFO - [2025-07-20 15:42:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2649, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 99
- 2025-07-20 15:42:00,550 - __main__ - INFO - sglang running req: 10 queue req: 99
- 2025-07-20 15:42:01,586 - sglang - INFO - [2025-07-20 15:42:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2396, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 98
- 2025-07-20 15:42:01,586 - __main__ - INFO - sglang running req: 10 queue req: 98
- 2025-07-20 15:42:02,385 - sglang - INFO - [2025-07-20 15:42:02 TP0] Decode batch. #running-req: 11, #token: 28450, token usage: 0.75, gen throughput (token/s): 96.84, #queue-req: 98
- 2025-07-20 15:42:02,385 - __main__ - INFO - sglang running req: 11 queue req: 98
- 2025-07-20 15:42:03,247 - sglang - INFO - [2025-07-20 15:42:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2385, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 97
- 2025-07-20 15:42:03,247 - __main__ - INFO - sglang running req: 10 queue req: 97
- 2025-07-20 15:42:03,818 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:03,818 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 912.80 1062.73
- sglang_output_tokens 259.75 293.32
- 2025-07-20 15:42:03,818 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 402 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:04,120 - sglang - INFO - [2025-07-20 15:42:04 TP0] Decode batch. #running-req: 11, #token: 28484, token usage: 0.75, gen throughput (token/s): 253.02, #queue-req: 97
- 2025-07-20 15:42:04,120 - __main__ - INFO - sglang running req: 11 queue req: 97
- 2025-07-20 15:42:05,105 - sglang - INFO - [2025-07-20 15:42:05 TP0] Decode batch. #running-req: 11, #token: 28924, token usage: 0.76, gen throughput (token/s): 446.50, #queue-req: 97
- 2025-07-20 15:42:05,106 - __main__ - INFO - sglang running req: 11 queue req: 97
- 2025-07-20 15:42:05,624 - sglang - INFO - [2025-07-20 15:42:05 TP0] Prefill batch. #new-seq: 1, #new-token: 2626, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 96
- 2025-07-20 15:42:05,624 - __main__ - INFO - sglang running req: 10 queue req: 96
- 2025-07-20 15:42:06,882 - sglang - INFO - [2025-07-20 15:42:06 TP0] Decode batch. #running-req: 11, #token: 29748, token usage: 0.78, gen throughput (token/s): 247.15, #queue-req: 96
- 2025-07-20 15:42:06,882 - __main__ - INFO - sglang running req: 11 queue req: 96
- 2025-07-20 15:42:07,870 - sglang - INFO - [2025-07-20 15:42:07 TP0] Decode batch. #running-req: 11, #token: 30188, token usage: 0.79, gen throughput (token/s): 445.15, #queue-req: 96
- 2025-07-20 15:42:07,870 - __main__ - INFO - sglang running req: 11 queue req: 96
- 2025-07-20 15:42:08,341 - sglang - INFO - [2025-07-20 15:42:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2874, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 95
- 2025-07-20 15:42:08,341 - __main__ - INFO - sglang running req: 10 queue req: 95
- 2025-07-20 15:42:09,688 - sglang - INFO - [2025-07-20 15:42:09 TP0] Decode batch. #running-req: 11, #token: 30182, token usage: 0.79, gen throughput (token/s): 241.50, #queue-req: 95
- 2025-07-20 15:42:09,688 - __main__ - INFO - sglang running req: 11 queue req: 95
- 2025-07-20 15:42:10,676 - sglang - INFO - [2025-07-20 15:42:10 TP0] Decode batch. #running-req: 11, #token: 30622, token usage: 0.81, gen throughput (token/s): 445.28, #queue-req: 95
- 2025-07-20 15:42:10,676 - __main__ - INFO - sglang running req: 11 queue req: 95
- 2025-07-20 15:42:11,472 - sglang - INFO - [2025-07-20 15:42:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1918, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 94
- 2025-07-20 15:42:11,472 - __main__ - INFO - sglang running req: 10 queue req: 94
- 2025-07-20 15:42:12,333 - sglang - INFO - [2025-07-20 15:42:12 TP0] Decode batch. #running-req: 11, #token: 30483, token usage: 0.80, gen throughput (token/s): 264.92, #queue-req: 94
- 2025-07-20 15:42:12,333 - __main__ - INFO - sglang running req: 11 queue req: 94
- 2025-07-20 15:42:13,322 - sglang - INFO - [2025-07-20 15:42:13 TP0] Decode batch. #running-req: 11, #token: 30923, token usage: 0.81, gen throughput (token/s): 445.14, #queue-req: 94
- 2025-07-20 15:42:13,322 - __main__ - INFO - sglang running req: 11 queue req: 94
- 2025-07-20 15:42:13,819 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:13,819 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 910.30 1051.30
- sglang_output_tokens 258.73 290.07
- 2025-07-20 15:42:13,819 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 405 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:14,312 - sglang - INFO - [2025-07-20 15:42:14 TP0] Decode batch. #running-req: 11, #token: 31363, token usage: 0.83, gen throughput (token/s): 444.35, #queue-req: 94
- 2025-07-20 15:42:14,312 - __main__ - INFO - sglang running req: 11 queue req: 94
- 2025-07-20 15:42:15,304 - sglang - INFO - [2025-07-20 15:42:15 TP0] Decode batch. #running-req: 11, #token: 31803, token usage: 0.84, gen throughput (token/s): 443.28, #queue-req: 94
- 2025-07-20 15:42:15,305 - __main__ - INFO - sglang running req: 11 queue req: 94
- 2025-07-20 15:42:16,297 - sglang - INFO - [2025-07-20 15:42:16 TP0] Decode batch. #running-req: 11, #token: 32243, token usage: 0.85, gen throughput (token/s): 443.42, #queue-req: 94
- 2025-07-20 15:42:16,297 - __main__ - INFO - sglang running req: 11 queue req: 94
- 2025-07-20 15:42:16,990 - sglang - INFO - [2025-07-20 15:42:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2169, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 93
- 2025-07-20 15:42:16,991 - __main__ - INFO - sglang running req: 10 queue req: 93
- 2025-07-20 15:42:17,964 - sglang - INFO - [2025-07-20 15:42:17 TP0] Decode batch. #running-req: 11, #token: 31711, token usage: 0.83, gen throughput (token/s): 263.35, #queue-req: 93
- 2025-07-20 15:42:17,964 - __main__ - INFO - sglang running req: 11 queue req: 93
- 2025-07-20 15:42:18,038 - sglang - INFO - [2025-07-20 15:42:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2421, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 92
- 2025-07-20 15:42:18,038 - __main__ - INFO - sglang running req: 10 queue req: 92
- 2025-07-20 15:42:19,858 - sglang - INFO - [2025-07-20 15:42:19 TP0] Decode batch. #running-req: 10, #token: 29179, token usage: 0.77, gen throughput (token/s): 231.21, #queue-req: 92
- 2025-07-20 15:42:19,858 - __main__ - INFO - sglang running req: 10 queue req: 92
- 2025-07-20 15:42:19,858 - sglang - INFO - [2025-07-20 15:42:19 TP0] Prefill batch. #new-seq: 1, #new-token: 3211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 91
- 2025-07-20 15:42:19,859 - __main__ - INFO - sglang running req: 10 queue req: 91
- 2025-07-20 15:42:21,436 - sglang - INFO - [2025-07-20 15:42:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2187, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 90
- 2025-07-20 15:42:21,436 - __main__ - INFO - sglang running req: 10 queue req: 90
- 2025-07-20 15:42:22,484 - sglang - INFO - [2025-07-20 15:42:22 TP0] Decode batch. #running-req: 11, #token: 28526, token usage: 0.75, gen throughput (token/s): 167.21, #queue-req: 90
- 2025-07-20 15:42:22,484 - __main__ - INFO - sglang running req: 11 queue req: 90
- 2025-07-20 15:42:22,509 - sglang - INFO - [2025-07-20 15:42:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1759, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 89
- 2025-07-20 15:42:22,509 - __main__ - INFO - sglang running req: 10 queue req: 89
- 2025-07-20 15:42:23,820 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:23,821 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 913.10 1052.30
- sglang_output_tokens 259.11 289.47
- 2025-07-20 15:42:23,821 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 410 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:24,079 - sglang - INFO - [2025-07-20 15:42:24 TP0] Decode batch. #running-req: 11, #token: 30724, token usage: 0.81, gen throughput (token/s): 275.17, #queue-req: 89
- 2025-07-20 15:42:24,079 - __main__ - INFO - sglang running req: 11 queue req: 89
- 2025-07-20 15:42:24,696 - sglang - INFO - [2025-07-20 15:42:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 88
- 2025-07-20 15:42:24,696 - __main__ - INFO - sglang running req: 10 queue req: 88
- 2025-07-20 15:42:25,590 - sglang - INFO - [2025-07-20 15:42:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2445, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 87
- 2025-07-20 15:42:25,590 - __main__ - INFO - sglang running req: 10 queue req: 87
- 2025-07-20 15:42:26,465 - sglang - INFO - [2025-07-20 15:42:26 TP0] Decode batch. #running-req: 11, #token: 29396, token usage: 0.77, gen throughput (token/s): 183.58, #queue-req: 87
- 2025-07-20 15:42:26,465 - __main__ - INFO - sglang running req: 11 queue req: 87
- 2025-07-20 15:42:26,515 - sglang - INFO - [2025-07-20 15:42:26 TP0] Prefill batch. #new-seq: 1, #new-token: 1511, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 86
- 2025-07-20 15:42:26,515 - __main__ - INFO - sglang running req: 10 queue req: 86
- 2025-07-20 15:42:28,012 - sglang - INFO - [2025-07-20 15:42:28 TP0] Decode batch. #running-req: 11, #token: 29059, token usage: 0.76, gen throughput (token/s): 283.72, #queue-req: 86
- 2025-07-20 15:42:28,012 - __main__ - INFO - sglang running req: 11 queue req: 86
- 2025-07-20 15:42:28,824 - sglang - INFO - [2025-07-20 15:42:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2401, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 85
- 2025-07-20 15:42:28,824 - __main__ - INFO - sglang running req: 10 queue req: 85
- 2025-07-20 15:42:29,728 - sglang - INFO - [2025-07-20 15:42:29 TP0] Decode batch. #running-req: 11, #token: 28662, token usage: 0.75, gen throughput (token/s): 255.84, #queue-req: 85
- 2025-07-20 15:42:29,728 - __main__ - INFO - sglang running req: 11 queue req: 85
- 2025-07-20 15:42:30,712 - sglang - INFO - [2025-07-20 15:42:30 TP0] Decode batch. #running-req: 11, #token: 29102, token usage: 0.77, gen throughput (token/s): 447.01, #queue-req: 85
- 2025-07-20 15:42:30,713 - __main__ - INFO - sglang running req: 11 queue req: 85
- 2025-07-20 15:42:31,697 - sglang - INFO - [2025-07-20 15:42:31 TP0] Decode batch. #running-req: 11, #token: 29542, token usage: 0.78, gen throughput (token/s): 447.08, #queue-req: 85
- 2025-07-20 15:42:31,697 - __main__ - INFO - sglang running req: 11 queue req: 85
- 2025-07-20 15:42:32,657 - sglang - INFO - [2025-07-20 15:42:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2574, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 84
- 2025-07-20 15:42:32,658 - __main__ - INFO - sglang running req: 10 queue req: 84
- 2025-07-20 15:42:33,464 - sglang - INFO - [2025-07-20 15:42:33 TP0] Decode batch. #running-req: 11, #token: 30848, token usage: 0.81, gen throughput (token/s): 248.36, #queue-req: 84
- 2025-07-20 15:42:33,464 - __main__ - INFO - sglang running req: 11 queue req: 84
- 2025-07-20 15:42:33,822 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:33,822 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 914.70 1056.36
- sglang_output_tokens 259.00 288.49
- 2025-07-20 15:42:33,823 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 415 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:34,208 - sglang - INFO - [2025-07-20 15:42:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2965, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 83
- 2025-07-20 15:42:34,209 - __main__ - INFO - sglang running req: 10 queue req: 83
- 2025-07-20 15:42:35,322 - sglang - INFO - [2025-07-20 15:42:35 TP0] Decode batch. #running-req: 11, #token: 31682, token usage: 0.83, gen throughput (token/s): 236.34, #queue-req: 83
- 2025-07-20 15:42:35,322 - __main__ - INFO - sglang running req: 11 queue req: 83
- 2025-07-20 15:42:35,644 - sglang - INFO - [2025-07-20 15:42:35 TP0] Prefill batch. #new-seq: 1, #new-token: 1903, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 82
- 2025-07-20 15:42:35,644 - __main__ - INFO - sglang running req: 10 queue req: 82
- 2025-07-20 15:42:36,960 - sglang - INFO - [2025-07-20 15:42:36 TP0] Decode batch. #running-req: 11, #token: 30437, token usage: 0.80, gen throughput (token/s): 267.98, #queue-req: 82
- 2025-07-20 15:42:36,960 - __main__ - INFO - sglang running req: 11 queue req: 82
- 2025-07-20 15:42:37,949 - sglang - INFO - [2025-07-20 15:42:37 TP0] Decode batch. #running-req: 11, #token: 30877, token usage: 0.81, gen throughput (token/s): 444.92, #queue-req: 82
- 2025-07-20 15:42:37,949 - __main__ - INFO - sglang running req: 11 queue req: 82
- 2025-07-20 15:42:38,543 - sglang - INFO - [2025-07-20 15:42:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2292, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 81
- 2025-07-20 15:42:38,543 - __main__ - INFO - sglang running req: 10 queue req: 81
- 2025-07-20 15:42:39,663 - sglang - INFO - [2025-07-20 15:42:39 TP0] Decode batch. #running-req: 11, #token: 31427, token usage: 0.83, gen throughput (token/s): 256.13, #queue-req: 81
- 2025-07-20 15:42:39,663 - __main__ - INFO - sglang running req: 11 queue req: 81
- 2025-07-20 15:42:40,653 - sglang - INFO - [2025-07-20 15:42:40 TP0] Decode batch. #running-req: 11, #token: 31867, token usage: 0.84, gen throughput (token/s): 444.34, #queue-req: 81
- 2025-07-20 15:42:40,653 - __main__ - INFO - sglang running req: 11 queue req: 81
- 2025-07-20 15:42:41,647 - sglang - INFO - [2025-07-20 15:42:41 TP0] Decode batch. #running-req: 11, #token: 32307, token usage: 0.85, gen throughput (token/s): 442.53, #queue-req: 81
- 2025-07-20 15:42:41,648 - __main__ - INFO - sglang running req: 11 queue req: 81
- 2025-07-20 15:42:41,971 - sglang - INFO - [2025-07-20 15:42:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2096, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 80
- 2025-07-20 15:42:41,971 - __main__ - INFO - sglang running req: 10 queue req: 80
- 2025-07-20 15:42:43,328 - sglang - INFO - [2025-07-20 15:42:43 TP0] Decode batch. #running-req: 11, #token: 32519, token usage: 0.86, gen throughput (token/s): 261.21, #queue-req: 80
- 2025-07-20 15:42:43,328 - __main__ - INFO - sglang running req: 11 queue req: 80
- 2025-07-20 15:42:43,824 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:43,824 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 914.24 1044.39
- sglang_output_tokens 258.45 283.01
- 2025-07-20 15:42:43,824 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 419 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:44,323 - sglang - INFO - [2025-07-20 15:42:44 TP0] Decode batch. #running-req: 11, #token: 32959, token usage: 0.87, gen throughput (token/s): 442.26, #queue-req: 80
- 2025-07-20 15:42:44,323 - __main__ - INFO - sglang running req: 11 queue req: 80
- 2025-07-20 15:42:45,392 - sglang - INFO - [2025-07-20 15:42:45 TP0] Decode batch. #running-req: 11, #token: 33399, token usage: 0.88, gen throughput (token/s): 411.52, #queue-req: 80
- 2025-07-20 15:42:45,392 - __main__ - INFO - sglang running req: 11 queue req: 80
- 2025-07-20 15:42:46,390 - sglang - INFO - [2025-07-20 15:42:46 TP0] Decode batch. #running-req: 11, #token: 33839, token usage: 0.89, gen throughput (token/s): 440.69, #queue-req: 80
- 2025-07-20 15:42:46,391 - __main__ - INFO - sglang running req: 11 queue req: 80
- 2025-07-20 15:42:47,385 - sglang - INFO - [2025-07-20 15:42:47 TP0] Decode batch. #running-req: 11, #token: 34279, token usage: 0.90, gen throughput (token/s): 442.36, #queue-req: 80
- 2025-07-20 15:42:47,385 - __main__ - INFO - sglang running req: 11 queue req: 80
- 2025-07-20 15:42:48,380 - sglang - INFO - [2025-07-20 15:42:48 TP0] Decode batch. #running-req: 10, #token: 31589, token usage: 0.83, gen throughput (token/s): 423.10, #queue-req: 80
- 2025-07-20 15:42:48,381 - __main__ - INFO - sglang running req: 10 queue req: 80
- 2025-07-20 15:42:48,775 - sglang - INFO - [2025-07-20 15:42:48 TP0] Prefill batch. #new-seq: 1, #new-token: 1387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 79
- 2025-07-20 15:42:48,775 - __main__ - INFO - sglang running req: 9 queue req: 79
- 2025-07-20 15:42:49,418 - sglang - INFO - [2025-07-20 15:42:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2621, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 78
- 2025-07-20 15:42:49,419 - __main__ - INFO - sglang running req: 9 queue req: 78
- 2025-07-20 15:42:50,476 - sglang - INFO - [2025-07-20 15:42:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 77
- 2025-07-20 15:42:50,476 - __main__ - INFO - sglang running req: 9 queue req: 77
- 2025-07-20 15:42:51,418 - sglang - INFO - [2025-07-20 15:42:51 TP0] Decode batch. #running-req: 10, #token: 29770, token usage: 0.78, gen throughput (token/s): 130.67, #queue-req: 77
- 2025-07-20 15:42:51,419 - __main__ - INFO - sglang running req: 10 queue req: 77
- 2025-07-20 15:42:52,402 - sglang - INFO - [2025-07-20 15:42:52 TP0] Decode batch. #running-req: 10, #token: 30170, token usage: 0.79, gen throughput (token/s): 406.58, #queue-req: 77
- 2025-07-20 15:42:52,402 - __main__ - INFO - sglang running req: 10 queue req: 77
- 2025-07-20 15:42:52,599 - sglang - INFO - [2025-07-20 15:42:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2078, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 76
- 2025-07-20 15:42:52,599 - __main__ - INFO - sglang running req: 9 queue req: 76
- 2025-07-20 15:42:53,825 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:42:53,825 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 916.19 1056.89
- sglang_output_tokens 259.02 287.26
- 2025-07-20 15:42:53,826 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 424 | 500
- 1 | 0 | 10
- 2025-07-20 15:42:54,067 - sglang - INFO - [2025-07-20 15:42:54 TP0] Decode batch. #running-req: 10, #token: 29932, token usage: 0.79, gen throughput (token/s): 239.67, #queue-req: 76
- 2025-07-20 15:42:54,067 - __main__ - INFO - sglang running req: 10 queue req: 76
- 2025-07-20 15:42:54,633 - sglang - INFO - [2025-07-20 15:42:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 75
- 2025-07-20 15:42:54,633 - __main__ - INFO - sglang running req: 9 queue req: 75
- 2025-07-20 15:42:55,431 - sglang - INFO - [2025-07-20 15:42:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2238, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 74
- 2025-07-20 15:42:55,431 - __main__ - INFO - sglang running req: 9 queue req: 74
- 2025-07-20 15:42:56,350 - sglang - INFO - [2025-07-20 15:42:56 TP0] Prefill batch. #new-seq: 2, #new-token: 4141, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 72
- 2025-07-20 15:42:56,350 - __main__ - INFO - sglang running req: 9 queue req: 72
- 2025-07-20 15:42:57,819 - sglang - INFO - [2025-07-20 15:42:57 TP0] Decode batch. #running-req: 11, #token: 28566, token usage: 0.75, gen throughput (token/s): 107.41, #queue-req: 72
- 2025-07-20 15:42:57,819 - __main__ - INFO - sglang running req: 11 queue req: 72
- 2025-07-20 15:42:58,804 - sglang - INFO - [2025-07-20 15:42:58 TP0] Decode batch. #running-req: 11, #token: 29006, token usage: 0.76, gen throughput (token/s): 446.64, #queue-req: 72
- 2025-07-20 15:42:58,804 - __main__ - INFO - sglang running req: 11 queue req: 72
- 2025-07-20 15:42:59,125 - sglang - INFO - [2025-07-20 15:42:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2579, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 71
- 2025-07-20 15:42:59,125 - __main__ - INFO - sglang running req: 10 queue req: 71
- 2025-07-20 15:43:00,576 - sglang - INFO - [2025-07-20 15:43:00 TP0] Decode batch. #running-req: 11, #token: 30441, token usage: 0.80, gen throughput (token/s): 247.75, #queue-req: 71
- 2025-07-20 15:43:00,576 - __main__ - INFO - sglang running req: 11 queue req: 71
- 2025-07-20 15:43:01,565 - sglang - INFO - [2025-07-20 15:43:01 TP0] Decode batch. #running-req: 11, #token: 30881, token usage: 0.81, gen throughput (token/s): 444.91, #queue-req: 71
- 2025-07-20 15:43:01,565 - __main__ - INFO - sglang running req: 11 queue req: 71
- 2025-07-20 15:43:01,664 - sglang - INFO - [2025-07-20 15:43:01 TP0] Prefill batch. #new-seq: 1, #new-token: 3211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 70
- 2025-07-20 15:43:01,664 - __main__ - INFO - sglang running req: 10 queue req: 70
- 2025-07-20 15:43:03,453 - sglang - INFO - [2025-07-20 15:43:03 TP0] Decode batch. #running-req: 11, #token: 31240, token usage: 0.82, gen throughput (token/s): 232.50, #queue-req: 70
- 2025-07-20 15:43:03,453 - __main__ - INFO - sglang running req: 11 queue req: 70
- 2025-07-20 15:43:03,802 - sglang - INFO - [2025-07-20 15:43:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2376, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 69
- 2025-07-20 15:43:03,802 - __main__ - INFO - sglang running req: 10 queue req: 69
- 2025-07-20 15:43:03,826 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:03,827 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 920.23 1058.23
- sglang_output_tokens 260.09 287.64
- 2025-07-20 15:43:03,827 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 430 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:04,722 - sglang - INFO - [2025-07-20 15:43:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1547, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 68
- 2025-07-20 15:43:04,722 - __main__ - INFO - sglang running req: 10 queue req: 68
- 2025-07-20 15:43:05,769 - sglang - INFO - [2025-07-20 15:43:05 TP0] Decode batch. #running-req: 11, #token: 29815, token usage: 0.78, gen throughput (token/s): 189.09, #queue-req: 68
- 2025-07-20 15:43:05,770 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:06,755 - sglang - INFO - [2025-07-20 15:43:06 TP0] Decode batch. #running-req: 11, #token: 30255, token usage: 0.80, gen throughput (token/s): 446.23, #queue-req: 68
- 2025-07-20 15:43:06,756 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:07,745 - sglang - INFO - [2025-07-20 15:43:07 TP0] Decode batch. #running-req: 11, #token: 30695, token usage: 0.81, gen throughput (token/s): 444.69, #queue-req: 68
- 2025-07-20 15:43:07,745 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:08,735 - sglang - INFO - [2025-07-20 15:43:08 TP0] Decode batch. #running-req: 11, #token: 31135, token usage: 0.82, gen throughput (token/s): 444.63, #queue-req: 68
- 2025-07-20 15:43:08,735 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:09,724 - sglang - INFO - [2025-07-20 15:43:09 TP0] Decode batch. #running-req: 11, #token: 31575, token usage: 0.83, gen throughput (token/s): 444.93, #queue-req: 68
- 2025-07-20 15:43:09,724 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:10,712 - sglang - INFO - [2025-07-20 15:43:10 TP0] Decode batch. #running-req: 11, #token: 32015, token usage: 0.84, gen throughput (token/s): 445.08, #queue-req: 68
- 2025-07-20 15:43:10,712 - __main__ - INFO - sglang running req: 11 queue req: 68
- 2025-07-20 15:43:11,455 - sglang - INFO - [2025-07-20 15:43:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2480, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 67
- 2025-07-20 15:43:11,456 - __main__ - INFO - sglang running req: 10 queue req: 67
- 2025-07-20 15:43:12,456 - sglang - INFO - [2025-07-20 15:43:12 TP0] Decode batch. #running-req: 9, #token: 25570, token usage: 0.67, gen throughput (token/s): 250.49, #queue-req: 67
- 2025-07-20 15:43:12,457 - __main__ - INFO - sglang running req: 9 queue req: 67
- 2025-07-20 15:43:12,457 - sglang - INFO - [2025-07-20 15:43:12 TP0] Prefill batch. #new-seq: 1, #new-token: 3361, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 66
- 2025-07-20 15:43:12,457 - __main__ - INFO - sglang running req: 9 queue req: 66
- 2025-07-20 15:43:13,828 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:13,828 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 921.06 1060.37
- sglang_output_tokens 260.46 288.68
- 2025-07-20 15:43:13,828 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 434 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:14,183 - sglang - INFO - [2025-07-20 15:43:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2132, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 65
- 2025-07-20 15:43:14,183 - __main__ - INFO - sglang running req: 9 queue req: 65
- 2025-07-20 15:43:15,000 - sglang - INFO - [2025-07-20 15:43:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1946, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 64
- 2025-07-20 15:43:15,000 - __main__ - INFO - sglang running req: 9 queue req: 64
- 2025-07-20 15:43:15,702 - sglang - INFO - [2025-07-20 15:43:15 TP0] Decode batch. #running-req: 10, #token: 27912, token usage: 0.73, gen throughput (token/s): 122.63, #queue-req: 64
- 2025-07-20 15:43:15,703 - __main__ - INFO - sglang running req: 10 queue req: 64
- 2025-07-20 15:43:16,774 - sglang - INFO - [2025-07-20 15:43:16 TP0] Decode batch. #running-req: 10, #token: 28312, token usage: 0.75, gen throughput (token/s): 373.29, #queue-req: 64
- 2025-07-20 15:43:16,774 - __main__ - INFO - sglang running req: 10 queue req: 64
- 2025-07-20 15:43:17,763 - sglang - INFO - [2025-07-20 15:43:17 TP0] Decode batch. #running-req: 10, #token: 28712, token usage: 0.76, gen throughput (token/s): 404.48, #queue-req: 64
- 2025-07-20 15:43:17,763 - __main__ - INFO - sglang running req: 10 queue req: 64
- 2025-07-20 15:43:18,741 - sglang - INFO - [2025-07-20 15:43:18 TP0] Decode batch. #running-req: 10, #token: 29112, token usage: 0.77, gen throughput (token/s): 408.71, #queue-req: 64
- 2025-07-20 15:43:18,742 - __main__ - INFO - sglang running req: 10 queue req: 64
- 2025-07-20 15:43:19,451 - sglang - INFO - [2025-07-20 15:43:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2343, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 63
- 2025-07-20 15:43:19,451 - __main__ - INFO - sglang running req: 9 queue req: 63
- 2025-07-20 15:43:20,492 - sglang - INFO - [2025-07-20 15:43:20 TP0] Decode batch. #running-req: 10, #token: 28939, token usage: 0.76, gen throughput (token/s): 227.85, #queue-req: 63
- 2025-07-20 15:43:20,493 - __main__ - INFO - sglang running req: 10 queue req: 63
- 2025-07-20 15:43:21,277 - sglang - INFO - [2025-07-20 15:43:21 TP0] Prefill batch. #new-seq: 2, #new-token: 3593, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 61
- 2025-07-20 15:43:21,277 - __main__ - INFO - sglang running req: 9 queue req: 61
- 2025-07-20 15:43:22,683 - sglang - INFO - [2025-07-20 15:43:22 TP0] Decode batch. #running-req: 11, #token: 29508, token usage: 0.78, gen throughput (token/s): 185.83, #queue-req: 61
- 2025-07-20 15:43:22,683 - __main__ - INFO - sglang running req: 11 queue req: 61
- 2025-07-20 15:43:22,929 - sglang - INFO - [2025-07-20 15:43:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 60
- 2025-07-20 15:43:22,929 - __main__ - INFO - sglang running req: 10 queue req: 60
- 2025-07-20 15:43:23,830 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:23,831 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 923.57 1077.76
- sglang_output_tokens 260.87 291.63
- 2025-07-20 15:43:23,831 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 439 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:23,859 - sglang - INFO - [2025-07-20 15:43:23 TP0] Prefill batch. #new-seq: 2, #new-token: 4060, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 10, #queue-req: 58
- 2025-07-20 15:43:23,860 - __main__ - INFO - sglang running req: 10 queue req: 58
- 2025-07-20 15:43:25,518 - sglang - INFO - [2025-07-20 15:43:25 TP0] Decode batch. #running-req: 12, #token: 29428, token usage: 0.77, gen throughput (token/s): 159.80, #queue-req: 58
- 2025-07-20 15:43:25,518 - __main__ - INFO - sglang running req: 12 queue req: 58
- 2025-07-20 15:43:26,505 - sglang - INFO - [2025-07-20 15:43:26 TP0] Decode batch. #running-req: 12, #token: 29908, token usage: 0.79, gen throughput (token/s): 486.15, #queue-req: 58
- 2025-07-20 15:43:26,505 - __main__ - INFO - sglang running req: 12 queue req: 58
- 2025-07-20 15:43:27,496 - sglang - INFO - [2025-07-20 15:43:27 TP0] Decode batch. #running-req: 12, #token: 30388, token usage: 0.80, gen throughput (token/s): 484.37, #queue-req: 58
- 2025-07-20 15:43:27,496 - __main__ - INFO - sglang running req: 12 queue req: 58
- 2025-07-20 15:43:28,491 - sglang - INFO - [2025-07-20 15:43:28 TP0] Decode batch. #running-req: 12, #token: 30868, token usage: 0.81, gen throughput (token/s): 482.47, #queue-req: 58
- 2025-07-20 15:43:28,491 - __main__ - INFO - sglang running req: 12 queue req: 58
- 2025-07-20 15:43:28,566 - sglang - INFO - [2025-07-20 15:43:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 57
- 2025-07-20 15:43:28,566 - __main__ - INFO - sglang running req: 11 queue req: 57
- 2025-07-20 15:43:29,321 - sglang - INFO - [2025-07-20 15:43:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2571, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 56
- 2025-07-20 15:43:29,321 - __main__ - INFO - sglang running req: 11 queue req: 56
- 2025-07-20 15:43:30,780 - sglang - INFO - [2025-07-20 15:43:30 TP0] Decode batch. #running-req: 12, #token: 30644, token usage: 0.81, gen throughput (token/s): 208.80, #queue-req: 56
- 2025-07-20 15:43:30,780 - __main__ - INFO - sglang running req: 12 queue req: 56
- 2025-07-20 15:43:31,129 - sglang - INFO - [2025-07-20 15:43:31 TP0] Prefill batch. #new-seq: 1, #new-token: 3400, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 55
- 2025-07-20 15:43:31,129 - __main__ - INFO - sglang running req: 11 queue req: 55
- 2025-07-20 15:43:32,724 - sglang - INFO - [2025-07-20 15:43:32 TP0] Decode batch. #running-req: 12, #token: 32162, token usage: 0.85, gen throughput (token/s): 246.43, #queue-req: 55
- 2025-07-20 15:43:32,724 - __main__ - INFO - sglang running req: 12 queue req: 55
- 2025-07-20 15:43:33,690 - sglang - INFO - [2025-07-20 15:43:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 54
- 2025-07-20 15:43:33,691 - __main__ - INFO - sglang running req: 10 queue req: 54
- 2025-07-20 15:43:33,832 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:33,832 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 925.94 1084.97
- sglang_output_tokens 261.06 291.57
- 2025-07-20 15:43:33,832 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 445 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:34,364 - sglang - INFO - [2025-07-20 15:43:34 TP0] Decode batch. #running-req: 11, #token: 29473, token usage: 0.78, gen throughput (token/s): 275.55, #queue-req: 54
- 2025-07-20 15:43:34,365 - __main__ - INFO - sglang running req: 11 queue req: 54
- 2025-07-20 15:43:34,959 - sglang - INFO - [2025-07-20 15:43:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 53
- 2025-07-20 15:43:34,960 - __main__ - INFO - sglang running req: 10 queue req: 53
- 2025-07-20 15:43:36,115 - sglang - INFO - [2025-07-20 15:43:36 TP0] Decode batch. #running-req: 11, #token: 31068, token usage: 0.82, gen throughput (token/s): 250.71, #queue-req: 53
- 2025-07-20 15:43:36,116 - __main__ - INFO - sglang running req: 11 queue req: 53
- 2025-07-20 15:43:37,108 - sglang - INFO - [2025-07-20 15:43:37 TP0] Decode batch. #running-req: 11, #token: 31508, token usage: 0.83, gen throughput (token/s): 443.24, #queue-req: 53
- 2025-07-20 15:43:37,108 - __main__ - INFO - sglang running req: 11 queue req: 53
- 2025-07-20 15:43:37,357 - sglang - INFO - [2025-07-20 15:43:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2380, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 52
- 2025-07-20 15:43:37,357 - __main__ - INFO - sglang running req: 10 queue req: 52
- 2025-07-20 15:43:38,852 - sglang - INFO - [2025-07-20 15:43:38 TP0] Decode batch. #running-req: 11, #token: 32317, token usage: 0.85, gen throughput (token/s): 251.70, #queue-req: 52
- 2025-07-20 15:43:38,852 - __main__ - INFO - sglang running req: 11 queue req: 52
- 2025-07-20 15:43:39,698 - sglang - INFO - [2025-07-20 15:43:39 TP0] Prefill batch. #new-seq: 1, #new-token: 1982, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 51
- 2025-07-20 15:43:39,699 - __main__ - INFO - sglang running req: 10 queue req: 51
- 2025-07-20 15:43:40,504 - sglang - INFO - [2025-07-20 15:43:40 TP0] Decode batch. #running-req: 11, #token: 31965, token usage: 0.84, gen throughput (token/s): 265.78, #queue-req: 51
- 2025-07-20 15:43:40,504 - __main__ - INFO - sglang running req: 11 queue req: 51
- 2025-07-20 15:43:41,497 - sglang - INFO - [2025-07-20 15:43:41 TP0] Decode batch. #running-req: 11, #token: 32405, token usage: 0.85, gen throughput (token/s): 443.20, #queue-req: 51
- 2025-07-20 15:43:41,497 - __main__ - INFO - sglang running req: 11 queue req: 51
- 2025-07-20 15:43:42,494 - sglang - INFO - [2025-07-20 15:43:42 TP0] Decode batch. #running-req: 11, #token: 32845, token usage: 0.86, gen throughput (token/s): 441.06, #queue-req: 51
- 2025-07-20 15:43:42,494 - __main__ - INFO - sglang running req: 11 queue req: 51
- 2025-07-20 15:43:43,119 - sglang - INFO - [2025-07-20 15:43:43 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 50
- 2025-07-20 15:43:43,119 - __main__ - INFO - sglang running req: 10 queue req: 50
- 2025-07-20 15:43:43,834 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:43,834 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 924.34 1063.76
- sglang_output_tokens 260.21 284.13
- 2025-07-20 15:43:43,834 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 449 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:44,091 - sglang - INFO - [2025-07-20 15:43:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 49
- 2025-07-20 15:43:44,091 - __main__ - INFO - sglang running req: 10 queue req: 49
- 2025-07-20 15:43:44,793 - sglang - INFO - [2025-07-20 15:43:44 TP0] Decode batch. #running-req: 11, #token: 31988, token usage: 0.84, gen throughput (token/s): 190.52, #queue-req: 49
- 2025-07-20 15:43:44,793 - __main__ - INFO - sglang running req: 11 queue req: 49
- 2025-07-20 15:43:44,869 - sglang - INFO - [2025-07-20 15:43:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1487, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 48
- 2025-07-20 15:43:44,869 - __main__ - INFO - sglang running req: 10 queue req: 48
- 2025-07-20 15:43:46,404 - sglang - INFO - [2025-07-20 15:43:46 TP0] Decode batch. #running-req: 11, #token: 31292, token usage: 0.82, gen throughput (token/s): 272.57, #queue-req: 48
- 2025-07-20 15:43:46,404 - __main__ - INFO - sglang running req: 11 queue req: 48
- 2025-07-20 15:43:46,653 - sglang - INFO - [2025-07-20 15:43:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2564, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 47
- 2025-07-20 15:43:46,653 - __main__ - INFO - sglang running req: 10 queue req: 47
- 2025-07-20 15:43:48,181 - sglang - INFO - [2025-07-20 15:43:48 TP0] Decode batch. #running-req: 11, #token: 31291, token usage: 0.82, gen throughput (token/s): 247.13, #queue-req: 47
- 2025-07-20 15:43:48,182 - __main__ - INFO - sglang running req: 11 queue req: 47
- 2025-07-20 15:43:48,972 - sglang - INFO - [2025-07-20 15:43:48 TP0] Prefill batch. #new-seq: 1, #new-token: 2411, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 46
- 2025-07-20 15:43:48,973 - __main__ - INFO - sglang running req: 10 queue req: 46
- 2025-07-20 15:43:49,909 - sglang - INFO - [2025-07-20 15:43:49 TP0] Decode batch. #running-req: 11, #token: 31922, token usage: 0.84, gen throughput (token/s): 253.88, #queue-req: 46
- 2025-07-20 15:43:49,909 - __main__ - INFO - sglang running req: 11 queue req: 46
- 2025-07-20 15:43:50,904 - sglang - INFO - [2025-07-20 15:43:50 TP0] Decode batch. #running-req: 11, #token: 32362, token usage: 0.85, gen throughput (token/s): 442.26, #queue-req: 46
- 2025-07-20 15:43:50,904 - __main__ - INFO - sglang running req: 11 queue req: 46
- 2025-07-20 15:43:51,894 - sglang - INFO - [2025-07-20 15:43:51 TP0] Decode batch. #running-req: 10, #token: 31118, token usage: 0.82, gen throughput (token/s): 409.93, #queue-req: 46
- 2025-07-20 15:43:51,895 - __main__ - INFO - sglang running req: 10 queue req: 46
- 2025-07-20 15:43:52,884 - sglang - INFO - [2025-07-20 15:43:52 TP0] Decode batch. #running-req: 10, #token: 31518, token usage: 0.83, gen throughput (token/s): 404.42, #queue-req: 46
- 2025-07-20 15:43:52,884 - __main__ - INFO - sglang running req: 10 queue req: 46
- 2025-07-20 15:43:53,082 - sglang - INFO - [2025-07-20 15:43:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1820, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 45
- 2025-07-20 15:43:53,082 - __main__ - INFO - sglang running req: 9 queue req: 45
- 2025-07-20 15:43:53,835 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:43:53,835 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 926.31 1084.65
- sglang_output_tokens 259.83 286.17
- 2025-07-20 15:43:53,836 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 455 | 500
- 1 | 0 | 10
- 2025-07-20 15:43:54,229 - sglang - INFO - [2025-07-20 15:43:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1362, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 44
- 2025-07-20 15:43:54,229 - __main__ - INFO - sglang running req: 9 queue req: 44
- 2025-07-20 15:43:55,045 - sglang - INFO - [2025-07-20 15:43:55 TP0] Decode batch. #running-req: 10, #token: 29802, token usage: 0.78, gen throughput (token/s): 184.16, #queue-req: 44
- 2025-07-20 15:43:55,045 - __main__ - INFO - sglang running req: 10 queue req: 44
- 2025-07-20 15:43:55,636 - sglang - INFO - [2025-07-20 15:43:55 TP0] Prefill batch. #new-seq: 1, #new-token: 3184, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 43
- 2025-07-20 15:43:55,636 - __main__ - INFO - sglang running req: 9 queue req: 43
- 2025-07-20 15:43:56,923 - sglang - INFO - [2025-07-20 15:43:56 TP0] Decode batch. #running-req: 10, #token: 28755, token usage: 0.76, gen throughput (token/s): 212.44, #queue-req: 43
- 2025-07-20 15:43:56,923 - __main__ - INFO - sglang running req: 10 queue req: 43
- 2025-07-20 15:43:56,997 - sglang - INFO - [2025-07-20 15:43:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2644, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 42
- 2025-07-20 15:43:56,997 - __main__ - INFO - sglang running req: 9 queue req: 42
- 2025-07-20 15:43:58,699 - sglang - INFO - [2025-07-20 15:43:58 TP0] Decode batch. #running-req: 10, #token: 28906, token usage: 0.76, gen throughput (token/s): 224.67, #queue-req: 42
- 2025-07-20 15:43:58,699 - __main__ - INFO - sglang running req: 10 queue req: 42
- 2025-07-20 15:43:58,994 - sglang - INFO - [2025-07-20 15:43:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1245, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 41
- 2025-07-20 15:43:58,994 - __main__ - INFO - sglang running req: 9 queue req: 41
- 2025-07-20 15:44:00,186 - sglang - INFO - [2025-07-20 15:44:00 TP0] Decode batch. #running-req: 10, #token: 27370, token usage: 0.72, gen throughput (token/s): 268.25, #queue-req: 41
- 2025-07-20 15:44:00,186 - __main__ - INFO - sglang running req: 10 queue req: 41
- 2025-07-20 15:44:01,167 - sglang - INFO - [2025-07-20 15:44:01 TP0] Decode batch. #running-req: 10, #token: 27770, token usage: 0.73, gen throughput (token/s): 407.73, #queue-req: 41
- 2025-07-20 15:44:01,168 - __main__ - INFO - sglang running req: 10 queue req: 41
- 2025-07-20 15:44:02,151 - sglang - INFO - [2025-07-20 15:44:02 TP0] Decode batch. #running-req: 10, #token: 28170, token usage: 0.74, gen throughput (token/s): 406.49, #queue-req: 41
- 2025-07-20 15:44:02,152 - __main__ - INFO - sglang running req: 10 queue req: 41
- 2025-07-20 15:44:02,545 - sglang - INFO - [2025-07-20 15:44:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2967, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 40
- 2025-07-20 15:44:02,545 - __main__ - INFO - sglang running req: 9 queue req: 40
- 2025-07-20 15:44:03,562 - sglang - INFO - [2025-07-20 15:44:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2119, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 39
- 2025-07-20 15:44:03,562 - __main__ - INFO - sglang running req: 9 queue req: 39
- 2025-07-20 15:44:03,837 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:03,837 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 929.84 1087.41
- sglang_output_tokens 260.68 286.18
- 2025-07-20 15:44:03,837 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 461 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:04,406 - sglang - INFO - [2025-07-20 15:44:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2470, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 38
- 2025-07-20 15:44:04,406 - __main__ - INFO - sglang running req: 9 queue req: 38
- 2025-07-20 15:44:05,435 - sglang - INFO - [2025-07-20 15:44:05 TP0] Decode batch. #running-req: 10, #token: 30742, token usage: 0.81, gen throughput (token/s): 120.89, #queue-req: 38
- 2025-07-20 15:44:05,435 - __main__ - INFO - sglang running req: 10 queue req: 38
- 2025-07-20 15:44:06,425 - sglang - INFO - [2025-07-20 15:44:06 TP0] Decode batch. #running-req: 10, #token: 31142, token usage: 0.82, gen throughput (token/s): 404.31, #queue-req: 38
- 2025-07-20 15:44:06,425 - __main__ - INFO - sglang running req: 10 queue req: 38
- 2025-07-20 15:44:07,413 - sglang - INFO - [2025-07-20 15:44:07 TP0] Decode batch. #running-req: 10, #token: 31542, token usage: 0.83, gen throughput (token/s): 404.78, #queue-req: 38
- 2025-07-20 15:44:07,413 - __main__ - INFO - sglang running req: 10 queue req: 38
- 2025-07-20 15:44:08,404 - sglang - INFO - [2025-07-20 15:44:08 TP0] Decode batch. #running-req: 10, #token: 31942, token usage: 0.84, gen throughput (token/s): 403.57, #queue-req: 38
- 2025-07-20 15:44:08,404 - __main__ - INFO - sglang running req: 10 queue req: 38
- 2025-07-20 15:44:09,397 - sglang - INFO - [2025-07-20 15:44:09 TP0] Decode batch. #running-req: 10, #token: 32342, token usage: 0.85, gen throughput (token/s): 402.77, #queue-req: 38
- 2025-07-20 15:44:09,397 - __main__ - INFO - sglang running req: 10 queue req: 38
- 2025-07-20 15:44:09,745 - sglang - INFO - [2025-07-20 15:44:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2327, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 37
- 2025-07-20 15:44:09,746 - __main__ - INFO - sglang running req: 9 queue req: 37
- 2025-07-20 15:44:11,140 - sglang - INFO - [2025-07-20 15:44:11 TP0] Decode batch. #running-req: 10, #token: 31861, token usage: 0.84, gen throughput (token/s): 228.90, #queue-req: 37
- 2025-07-20 15:44:11,140 - __main__ - INFO - sglang running req: 10 queue req: 37
- 2025-07-20 15:44:12,132 - sglang - INFO - [2025-07-20 15:44:12 TP0] Decode batch. #running-req: 10, #token: 32261, token usage: 0.85, gen throughput (token/s): 403.31, #queue-req: 37
- 2025-07-20 15:44:12,132 - __main__ - INFO - sglang running req: 10 queue req: 37
- 2025-07-20 15:44:12,182 - sglang - INFO - [2025-07-20 15:44:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 36
- 2025-07-20 15:44:12,182 - __main__ - INFO - sglang running req: 9 queue req: 36
- 2025-07-20 15:44:13,774 - sglang - INFO - [2025-07-20 15:44:13 TP0] Decode batch. #running-req: 10, #token: 32136, token usage: 0.85, gen throughput (token/s): 242.97, #queue-req: 36
- 2025-07-20 15:44:13,774 - __main__ - INFO - sglang running req: 10 queue req: 36
- 2025-07-20 15:44:13,828 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445224007035644H44421110A0001.pdf-3: Expecting ',' delimiter: line 1 column 2694 (char 2693)
- 2025-07-20 15:44:13,839 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:13,839 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 930.14 1082.33
- sglang_output_tokens 261.25 286.52
- 2025-07-20 15:44:13,839 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 464 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:13,849 - sglang - INFO - [2025-07-20 15:44:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 35
- 2025-07-20 15:44:13,849 - __main__ - INFO - sglang running req: 9 queue req: 35
- 2025-07-20 15:44:14,255 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
- 2025-07-20 15:44:15,222 - sglang - INFO - [2025-07-20 15:44:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1962, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 35
- 2025-07-20 15:44:15,222 - __main__ - INFO - sglang running req: 9 queue req: 35
- 2025-07-20 15:44:16,173 - sglang - INFO - [2025-07-20 15:44:16 TP0] Decode batch. #running-req: 10, #token: 28797, token usage: 0.76, gen throughput (token/s): 165.90, #queue-req: 35
- 2025-07-20 15:44:16,174 - __main__ - INFO - sglang running req: 10 queue req: 35
- 2025-07-20 15:44:17,160 - sglang - INFO - [2025-07-20 15:44:17 TP0] Decode batch. #running-req: 10, #token: 29197, token usage: 0.77, gen throughput (token/s): 405.37, #queue-req: 35
- 2025-07-20 15:44:17,160 - __main__ - INFO - sglang running req: 10 queue req: 35
- 2025-07-20 15:44:18,143 - sglang - INFO - [2025-07-20 15:44:18 TP0] Decode batch. #running-req: 10, #token: 29597, token usage: 0.78, gen throughput (token/s): 406.95, #queue-req: 35
- 2025-07-20 15:44:18,143 - __main__ - INFO - sglang running req: 10 queue req: 35
- 2025-07-20 15:44:19,126 - sglang - INFO - [2025-07-20 15:44:19 TP0] Decode batch. #running-req: 10, #token: 29997, token usage: 0.79, gen throughput (token/s): 406.67, #queue-req: 35
- 2025-07-20 15:44:19,127 - __main__ - INFO - sglang running req: 10 queue req: 35
- 2025-07-20 15:44:20,113 - sglang - INFO - [2025-07-20 15:44:20 TP0] Decode batch. #running-req: 10, #token: 30397, token usage: 0.80, gen throughput (token/s): 405.35, #queue-req: 35
- 2025-07-20 15:44:20,114 - __main__ - INFO - sglang running req: 10 queue req: 35
- 2025-07-20 15:44:20,881 - sglang - INFO - [2025-07-20 15:44:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2725, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 34
- 2025-07-20 15:44:20,881 - __main__ - INFO - sglang running req: 9 queue req: 34
- 2025-07-20 15:44:21,913 - sglang - INFO - [2025-07-20 15:44:21 TP0] Decode batch. #running-req: 10, #token: 31444, token usage: 0.83, gen throughput (token/s): 221.66, #queue-req: 34
- 2025-07-20 15:44:21,914 - __main__ - INFO - sglang running req: 10 queue req: 34
- 2025-07-20 15:44:22,905 - sglang - INFO - [2025-07-20 15:44:22 TP0] Decode batch. #running-req: 10, #token: 31844, token usage: 0.84, gen throughput (token/s): 403.53, #queue-req: 34
- 2025-07-20 15:44:22,905 - __main__ - INFO - sglang running req: 10 queue req: 34
- 2025-07-20 15:44:23,103 - sglang - INFO - [2025-07-20 15:44:23 TP0] Prefill batch. #new-seq: 1, #new-token: 1337, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 33
- 2025-07-20 15:44:23,104 - __main__ - INFO - sglang running req: 9 queue req: 33
- 2025-07-20 15:44:23,841 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:23,841 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 927.61 1075.17
- sglang_output_tokens 260.36 281.92
- 2025-07-20 15:44:23,841 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 467 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:23,920 - sglang - INFO - [2025-07-20 15:44:23 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 32
- 2025-07-20 15:44:23,920 - __main__ - INFO - sglang running req: 9 queue req: 32
- 2025-07-20 15:44:25,086 - sglang - INFO - [2025-07-20 15:44:25 TP0] Decode batch. #running-req: 10, #token: 30764, token usage: 0.81, gen throughput (token/s): 182.45, #queue-req: 32
- 2025-07-20 15:44:25,086 - __main__ - INFO - sglang running req: 10 queue req: 32
- 2025-07-20 15:44:26,072 - sglang - INFO - [2025-07-20 15:44:26 TP0] Decode batch. #running-req: 10, #token: 31164, token usage: 0.82, gen throughput (token/s): 405.79, #queue-req: 32
- 2025-07-20 15:44:26,072 - __main__ - INFO - sglang running req: 10 queue req: 32
- 2025-07-20 15:44:27,058 - sglang - INFO - [2025-07-20 15:44:27 TP0] Decode batch. #running-req: 10, #token: 31564, token usage: 0.83, gen throughput (token/s): 405.79, #queue-req: 32
- 2025-07-20 15:44:27,058 - __main__ - INFO - sglang running req: 10 queue req: 32
- 2025-07-20 15:44:28,049 - sglang - INFO - [2025-07-20 15:44:28 TP0] Decode batch. #running-req: 10, #token: 31964, token usage: 0.84, gen throughput (token/s): 403.57, #queue-req: 32
- 2025-07-20 15:44:28,049 - __main__ - INFO - sglang running req: 10 queue req: 32
- 2025-07-20 15:44:28,891 - sglang - INFO - [2025-07-20 15:44:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2371, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 31
- 2025-07-20 15:44:28,891 - __main__ - INFO - sglang running req: 8 queue req: 31
- 2025-07-20 15:44:29,787 - sglang - INFO - [2025-07-20 15:44:29 TP0] Decode batch. #running-req: 9, #token: 29052, token usage: 0.76, gen throughput (token/s): 218.53, #queue-req: 31
- 2025-07-20 15:44:29,788 - __main__ - INFO - sglang running req: 9 queue req: 31
- 2025-07-20 15:44:30,205 - sglang - INFO - [2025-07-20 15:44:30 TP0] Prefill batch. #new-seq: 3, #new-token: 5878, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 28
- 2025-07-20 15:44:30,205 - __main__ - INFO - sglang running req: 8 queue req: 28
- 2025-07-20 15:44:32,115 - sglang - INFO - [2025-07-20 15:44:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2370, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 27
- 2025-07-20 15:44:32,115 - __main__ - INFO - sglang running req: 10 queue req: 27
- 2025-07-20 15:44:33,408 - sglang - INFO - [2025-07-20 15:44:33 TP0] Decode batch. #running-req: 11, #token: 29044, token usage: 0.76, gen throughput (token/s): 111.58, #queue-req: 27
- 2025-07-20 15:44:33,409 - __main__ - INFO - sglang running req: 11 queue req: 27
- 2025-07-20 15:44:33,458 - sglang - INFO - [2025-07-20 15:44:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 26
- 2025-07-20 15:44:33,458 - __main__ - INFO - sglang running req: 10 queue req: 26
- 2025-07-20 15:44:33,843 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:33,843 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 932.39 1086.75
- sglang_output_tokens 262.01 287.15
- 2025-07-20 15:44:33,844 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 473 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:34,759 - sglang - INFO - [2025-07-20 15:44:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2555, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 25
- 2025-07-20 15:44:34,759 - __main__ - INFO - sglang running req: 10 queue req: 25
- 2025-07-20 15:44:35,964 - sglang - INFO - [2025-07-20 15:44:35 TP0] Decode batch. #running-req: 11, #token: 28856, token usage: 0.76, gen throughput (token/s): 171.40, #queue-req: 25
- 2025-07-20 15:44:35,964 - __main__ - INFO - sglang running req: 11 queue req: 25
- 2025-07-20 15:44:36,952 - sglang - INFO - [2025-07-20 15:44:36 TP0] Decode batch. #running-req: 11, #token: 29296, token usage: 0.77, gen throughput (token/s): 445.44, #queue-req: 25
- 2025-07-20 15:44:36,952 - __main__ - INFO - sglang running req: 11 queue req: 25
- 2025-07-20 15:44:37,594 - sglang - INFO - [2025-07-20 15:44:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2703, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 24
- 2025-07-20 15:44:37,594 - __main__ - INFO - sglang running req: 10 queue req: 24
- 2025-07-20 15:44:38,747 - sglang - INFO - [2025-07-20 15:44:38 TP0] Decode batch. #running-req: 11, #token: 29052, token usage: 0.76, gen throughput (token/s): 244.56, #queue-req: 24
- 2025-07-20 15:44:38,747 - __main__ - INFO - sglang running req: 11 queue req: 24
- 2025-07-20 15:44:39,733 - sglang - INFO - [2025-07-20 15:44:39 TP0] Decode batch. #running-req: 11, #token: 29492, token usage: 0.78, gen throughput (token/s): 445.84, #queue-req: 24
- 2025-07-20 15:44:39,734 - __main__ - INFO - sglang running req: 11 queue req: 24
- 2025-07-20 15:44:40,722 - sglang - INFO - [2025-07-20 15:44:40 TP0] Decode batch. #running-req: 11, #token: 29932, token usage: 0.79, gen throughput (token/s): 445.06, #queue-req: 24
- 2025-07-20 15:44:40,722 - __main__ - INFO - sglang running req: 11 queue req: 24
- 2025-07-20 15:44:41,710 - sglang - INFO - [2025-07-20 15:44:41 TP0] Decode batch. #running-req: 11, #token: 30372, token usage: 0.80, gen throughput (token/s): 445.19, #queue-req: 24
- 2025-07-20 15:44:41,711 - __main__ - INFO - sglang running req: 11 queue req: 24
- 2025-07-20 15:44:42,205 - sglang - INFO - [2025-07-20 15:44:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2901, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 23
- 2025-07-20 15:44:42,205 - __main__ - INFO - sglang running req: 10 queue req: 23
- 2025-07-20 15:44:43,539 - sglang - INFO - [2025-07-20 15:44:43 TP0] Decode batch. #running-req: 11, #token: 31609, token usage: 0.83, gen throughput (token/s): 240.05, #queue-req: 23
- 2025-07-20 15:44:43,539 - __main__ - INFO - sglang running req: 11 queue req: 23
- 2025-07-20 15:44:43,845 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:43,845 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 930.11 1070.68
- sglang_output_tokens 261.37 284.34
- 2025-07-20 15:44:43,846 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 476 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:44,536 - sglang - INFO - [2025-07-20 15:44:44 TP0] Decode batch. #running-req: 11, #token: 32049, token usage: 0.84, gen throughput (token/s): 441.56, #queue-req: 23
- 2025-07-20 15:44:44,536 - __main__ - INFO - sglang running req: 11 queue req: 23
- 2025-07-20 15:44:45,407 - sglang - INFO - [2025-07-20 15:44:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2369, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 22
- 2025-07-20 15:44:45,407 - __main__ - INFO - sglang running req: 10 queue req: 22
- 2025-07-20 15:44:46,281 - sglang - INFO - [2025-07-20 15:44:46 TP0] Decode batch. #running-req: 11, #token: 32161, token usage: 0.85, gen throughput (token/s): 251.52, #queue-req: 22
- 2025-07-20 15:44:46,281 - __main__ - INFO - sglang running req: 11 queue req: 22
- 2025-07-20 15:44:47,277 - sglang - INFO - [2025-07-20 15:44:47 TP0] Decode batch. #running-req: 11, #token: 32601, token usage: 0.86, gen throughput (token/s): 441.90, #queue-req: 22
- 2025-07-20 15:44:47,277 - __main__ - INFO - sglang running req: 11 queue req: 22
- 2025-07-20 15:44:48,271 - sglang - INFO - [2025-07-20 15:44:48 TP0] Decode batch. #running-req: 11, #token: 33041, token usage: 0.87, gen throughput (token/s): 442.43, #queue-req: 22
- 2025-07-20 15:44:48,271 - __main__ - INFO - sglang running req: 11 queue req: 22
- 2025-07-20 15:44:49,265 - sglang - INFO - [2025-07-20 15:44:49 TP0] Decode batch. #running-req: 10, #token: 26313, token usage: 0.69, gen throughput (token/s): 440.61, #queue-req: 22
- 2025-07-20 15:44:49,265 - __main__ - INFO - sglang running req: 10 queue req: 22
- 2025-07-20 15:44:49,290 - sglang - INFO - [2025-07-20 15:44:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2383, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 21
- 2025-07-20 15:44:49,290 - __main__ - INFO - sglang running req: 9 queue req: 21
- 2025-07-20 15:44:50,555 - sglang - INFO - [2025-07-20 15:44:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2804, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 20
- 2025-07-20 15:44:50,555 - __main__ - INFO - sglang running req: 9 queue req: 20
- 2025-07-20 15:44:51,565 - sglang - INFO - [2025-07-20 15:44:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2967, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 19
- 2025-07-20 15:44:51,565 - __main__ - INFO - sglang running req: 9 queue req: 19
- 2025-07-20 15:44:52,682 - sglang - INFO - [2025-07-20 15:44:52 TP0] Decode batch. #running-req: 10, #token: 29451, token usage: 0.78, gen throughput (token/s): 116.21, #queue-req: 19
- 2025-07-20 15:44:52,682 - __main__ - INFO - sglang running req: 10 queue req: 19
- 2025-07-20 15:44:53,666 - sglang - INFO - [2025-07-20 15:44:53 TP0] Decode batch. #running-req: 10, #token: 29851, token usage: 0.79, gen throughput (token/s): 406.19, #queue-req: 19
- 2025-07-20 15:44:53,667 - __main__ - INFO - sglang running req: 10 queue req: 19
- 2025-07-20 15:44:53,839 - sglang - INFO - [2025-07-20 15:44:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2572, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 18
- 2025-07-20 15:44:53,839 - __main__ - INFO - sglang running req: 9 queue req: 18
- 2025-07-20 15:44:53,847 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:44:53,847 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 933.93 1086.68
- sglang_output_tokens 262.56 290.05
- 2025-07-20 15:44:53,847 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 482 | 500
- 1 | 0 | 10
- 2025-07-20 15:44:55,459 - sglang - INFO - [2025-07-20 15:44:55 TP0] Decode batch. #running-req: 10, #token: 30164, token usage: 0.79, gen throughput (token/s): 222.52, #queue-req: 18
- 2025-07-20 15:44:55,460 - __main__ - INFO - sglang running req: 10 queue req: 18
- 2025-07-20 15:44:56,223 - sglang - INFO - [2025-07-20 15:44:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2125, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 17
- 2025-07-20 15:44:56,223 - __main__ - INFO - sglang running req: 9 queue req: 17
- 2025-07-20 15:44:57,117 - sglang - INFO - [2025-07-20 15:44:57 TP0] Decode batch. #running-req: 10, #token: 29525, token usage: 0.78, gen throughput (token/s): 240.78, #queue-req: 17
- 2025-07-20 15:44:57,117 - __main__ - INFO - sglang running req: 10 queue req: 17
- 2025-07-20 15:44:57,511 - sglang - INFO - [2025-07-20 15:44:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2485, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 16
- 2025-07-20 15:44:57,511 - __main__ - INFO - sglang running req: 9 queue req: 16
- 2025-07-20 15:44:58,859 - sglang - INFO - [2025-07-20 15:44:58 TP0] Decode batch. #running-req: 10, #token: 29055, token usage: 0.76, gen throughput (token/s): 229.02, #queue-req: 16
- 2025-07-20 15:44:58,859 - __main__ - INFO - sglang running req: 10 queue req: 16
- 2025-07-20 15:44:59,843 - sglang - INFO - [2025-07-20 15:44:59 TP0] Decode batch. #running-req: 10, #token: 29455, token usage: 0.78, gen throughput (token/s): 406.55, #queue-req: 16
- 2025-07-20 15:44:59,843 - __main__ - INFO - sglang running req: 10 queue req: 16
- 2025-07-20 15:45:00,829 - sglang - INFO - [2025-07-20 15:45:00 TP0] Decode batch. #running-req: 10, #token: 29855, token usage: 0.79, gen throughput (token/s): 405.49, #queue-req: 16
- 2025-07-20 15:45:00,829 - __main__ - INFO - sglang running req: 10 queue req: 16
- 2025-07-20 15:45:01,569 - sglang - INFO - [2025-07-20 15:45:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1915, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 15
- 2025-07-20 15:45:01,569 - __main__ - INFO - sglang running req: 9 queue req: 15
- 2025-07-20 15:45:02,467 - sglang - INFO - [2025-07-20 15:45:02 TP0] Decode batch. #running-req: 10, #token: 28366, token usage: 0.75, gen throughput (token/s): 243.63, #queue-req: 15
- 2025-07-20 15:45:02,467 - __main__ - INFO - sglang running req: 10 queue req: 15
- 2025-07-20 15:45:02,614 - sglang - INFO - [2025-07-20 15:45:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2971, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 14
- 2025-07-20 15:45:02,615 - __main__ - INFO - sglang running req: 9 queue req: 14
- 2025-07-20 15:45:03,848 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:03,848 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 934.57 1091.81
- sglang_output_tokens 262.47 290.17
- 2025-07-20 15:45:03,848 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 486 | 500
- 1 | 0 | 10
- 2025-07-20 15:45:04,320 - sglang - INFO - [2025-07-20 15:45:04 TP0] Decode batch. #running-req: 10, #token: 29437, token usage: 0.77, gen throughput (token/s): 215.26, #queue-req: 14
- 2025-07-20 15:45:04,321 - __main__ - INFO - sglang running req: 10 queue req: 14
- 2025-07-20 15:45:05,306 - sglang - INFO - [2025-07-20 15:45:05 TP0] Decode batch. #running-req: 10, #token: 29837, token usage: 0.79, gen throughput (token/s): 405.69, #queue-req: 14
- 2025-07-20 15:45:05,307 - __main__ - INFO - sglang running req: 10 queue req: 14
- 2025-07-20 15:45:06,296 - sglang - INFO - [2025-07-20 15:45:06 TP0] Decode batch. #running-req: 10, #token: 30237, token usage: 0.80, gen throughput (token/s): 404.29, #queue-req: 14
- 2025-07-20 15:45:06,296 - __main__ - INFO - sglang running req: 10 queue req: 14
- 2025-07-20 15:45:07,284 - sglang - INFO - [2025-07-20 15:45:07 TP0] Decode batch. #running-req: 10, #token: 30637, token usage: 0.81, gen throughput (token/s): 404.55, #queue-req: 14
- 2025-07-20 15:45:07,285 - __main__ - INFO - sglang running req: 10 queue req: 14
- 2025-07-20 15:45:08,272 - sglang - INFO - [2025-07-20 15:45:08 TP0] Decode batch. #running-req: 10, #token: 31037, token usage: 0.82, gen throughput (token/s): 404.97, #queue-req: 14
- 2025-07-20 15:45:08,273 - __main__ - INFO - sglang running req: 10 queue req: 14
- 2025-07-20 15:45:08,347 - sglang - INFO - [2025-07-20 15:45:08 TP0] Prefill batch. #new-seq: 1, #new-token: 1623, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 13
- 2025-07-20 15:45:08,347 - __main__ - INFO - sglang running req: 9 queue req: 13
- 2025-07-20 15:45:10,010 - sglang - INFO - [2025-07-20 15:45:10 TP0] Decode batch. #running-req: 10, #token: 30155, token usage: 0.79, gen throughput (token/s): 229.55, #queue-req: 13
- 2025-07-20 15:45:10,011 - __main__ - INFO - sglang running req: 10 queue req: 13
- 2025-07-20 15:45:10,997 - sglang - INFO - [2025-07-20 15:45:10 TP0] Decode batch. #running-req: 10, #token: 30555, token usage: 0.80, gen throughput (token/s): 405.29, #queue-req: 13
- 2025-07-20 15:45:10,997 - __main__ - INFO - sglang running req: 10 queue req: 13
- 2025-07-20 15:45:11,984 - sglang - INFO - [2025-07-20 15:45:11 TP0] Decode batch. #running-req: 10, #token: 30955, token usage: 0.81, gen throughput (token/s): 405.27, #queue-req: 13
- 2025-07-20 15:45:11,985 - __main__ - INFO - sglang running req: 10 queue req: 13
- 2025-07-20 15:45:12,976 - sglang - INFO - [2025-07-20 15:45:12 TP0] Decode batch. #running-req: 10, #token: 31355, token usage: 0.83, gen throughput (token/s): 403.51, #queue-req: 13
- 2025-07-20 15:45:12,976 - __main__ - INFO - sglang running req: 10 queue req: 13
- 2025-07-20 15:45:13,597 - sglang - INFO - [2025-07-20 15:45:13 TP0] Prefill batch. #new-seq: 1, #new-token: 3123, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 12
- 2025-07-20 15:45:13,597 - __main__ - INFO - sglang running req: 9 queue req: 12
- 2025-07-20 15:45:13,850 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:13,850 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 931.07 1058.65
- sglang_output_tokens 261.63 280.72
- 2025-07-20 15:45:13,850 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 488 | 500
- 1 | 0 | 10
- 2025-07-20 15:45:14,894 - sglang - INFO - [2025-07-20 15:45:14 TP0] Decode batch. #running-req: 10, #token: 31096, token usage: 0.82, gen throughput (token/s): 207.97, #queue-req: 12
- 2025-07-20 15:45:14,894 - __main__ - INFO - sglang running req: 10 queue req: 12
- 2025-07-20 15:45:15,044 - sglang - INFO - [2025-07-20 15:45:15 TP0] Prefill batch. #new-seq: 1, #new-token: 3337, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 11
- 2025-07-20 15:45:15,044 - __main__ - INFO - sglang running req: 9 queue req: 11
- 2025-07-20 15:45:16,401 - sglang - INFO - [2025-07-20 15:45:16 TP0] Prefill batch. #new-seq: 1, #new-token: 3404, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 10
- 2025-07-20 15:45:16,402 - __main__ - INFO - sglang running req: 9 queue req: 10
- 2025-07-20 15:45:17,782 - sglang - INFO - [2025-07-20 15:45:17 TP0] Prefill batch. #new-seq: 1, #new-token: 4466, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 9
- 2025-07-20 15:45:17,782 - __main__ - INFO - sglang running req: 8 queue req: 9
- 2025-07-20 15:45:19,072 - sglang - INFO - [2025-07-20 15:45:19 TP0] Decode batch. #running-req: 9, #token: 30826, token usage: 0.81, gen throughput (token/s): 93.60, #queue-req: 9
- 2025-07-20 15:45:19,072 - __main__ - INFO - sglang running req: 9 queue req: 9
- 2025-07-20 15:45:20,057 - sglang - INFO - [2025-07-20 15:45:20 TP0] Decode batch. #running-req: 9, #token: 31186, token usage: 0.82, gen throughput (token/s): 365.24, #queue-req: 9
- 2025-07-20 15:45:20,058 - __main__ - INFO - sglang running req: 9 queue req: 9
- 2025-07-20 15:45:20,329 - sglang - INFO - [2025-07-20 15:45:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2676, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 8, #queue-req: 8
- 2025-07-20 15:45:20,329 - __main__ - INFO - sglang running req: 8 queue req: 8
- 2025-07-20 15:45:21,875 - sglang - INFO - [2025-07-20 15:45:21 TP0] Decode batch. #running-req: 9, #token: 32311, token usage: 0.85, gen throughput (token/s): 197.45, #queue-req: 8
- 2025-07-20 15:45:21,876 - __main__ - INFO - sglang running req: 9 queue req: 8
- 2025-07-20 15:45:22,868 - sglang - INFO - [2025-07-20 15:45:22 TP0] Decode batch. #running-req: 9, #token: 32671, token usage: 0.86, gen throughput (token/s): 362.54, #queue-req: 8
- 2025-07-20 15:45:22,868 - __main__ - INFO - sglang running req: 9 queue req: 8
- 2025-07-20 15:45:23,809 - sglang - INFO - [2025-07-20 15:45:23 TP0] Prefill batch. #new-seq: 1, #new-token: 3126, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 8, #queue-req: 7
- 2025-07-20 15:45:23,809 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:23,851 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:23,851 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 934.77 1063.74
- sglang_output_tokens 262.72 283.70
- 2025-07-20 15:45:23,851 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 493 | 500
- 1 | 1 | 10
- 2025-07-20 15:45:24,786 - sglang - INFO - [2025-07-20 15:45:24 TP0] Decode batch. #running-req: 9, #token: 32118, token usage: 0.85, gen throughput (token/s): 187.20, #queue-req: 7
- 2025-07-20 15:45:24,786 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:25,775 - sglang - INFO - [2025-07-20 15:45:25 TP0] Decode batch. #running-req: 9, #token: 32478, token usage: 0.85, gen throughput (token/s): 364.16, #queue-req: 7
- 2025-07-20 15:45:25,775 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:26,763 - sglang - INFO - [2025-07-20 15:45:26 TP0] Decode batch. #running-req: 9, #token: 32838, token usage: 0.86, gen throughput (token/s): 364.20, #queue-req: 7
- 2025-07-20 15:45:26,763 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:27,862 - sglang - INFO - [2025-07-20 15:45:27 TP0] Decode batch. #running-req: 9, #token: 33198, token usage: 0.87, gen throughput (token/s): 327.70, #queue-req: 7
- 2025-07-20 15:45:27,862 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:28,856 - sglang - INFO - [2025-07-20 15:45:28 TP0] Decode batch. #running-req: 9, #token: 33558, token usage: 0.88, gen throughput (token/s): 361.96, #queue-req: 7
- 2025-07-20 15:45:28,857 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:29,849 - sglang - INFO - [2025-07-20 15:45:29 TP0] Decode batch. #running-req: 9, #token: 33918, token usage: 0.89, gen throughput (token/s): 362.56, #queue-req: 7
- 2025-07-20 15:45:29,850 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:30,844 - sglang - INFO - [2025-07-20 15:45:30 TP0] Decode batch. #running-req: 9, #token: 34278, token usage: 0.90, gen throughput (token/s): 362.01, #queue-req: 7
- 2025-07-20 15:45:30,844 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:31,839 - sglang - INFO - [2025-07-20 15:45:31 TP0] Decode batch. #running-req: 9, #token: 34638, token usage: 0.91, gen throughput (token/s): 361.72, #queue-req: 7
- 2025-07-20 15:45:31,839 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:32,838 - sglang - INFO - [2025-07-20 15:45:32 TP0] Decode batch. #running-req: 9, #token: 34998, token usage: 0.92, gen throughput (token/s): 360.48, #queue-req: 7
- 2025-07-20 15:45:32,838 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:33,835 - sglang - INFO - [2025-07-20 15:45:33 TP0] Decode batch. #running-req: 9, #token: 35358, token usage: 0.93, gen throughput (token/s): 360.83, #queue-req: 7
- 2025-07-20 15:45:33,836 - __main__ - INFO - sglang running req: 9 queue req: 7
- 2025-07-20 15:45:33,852 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:33,852 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 927.24 1063.74
- sglang_output_tokens 260.60 283.70
- 2025-07-20 15:45:33,852 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 493 | 500
- 1 | 1 | 10
- 2025-07-20 15:45:34,847 - sglang - INFO - [2025-07-20 15:45:34 TP0] Decode batch. #running-req: 8, #token: 31461, token usage: 0.83, gen throughput (token/s): 346.98, #queue-req: 7
- 2025-07-20 15:45:34,847 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:35,786 - sglang - INFO - [2025-07-20 15:45:35 TP0] Decode batch. #running-req: 8, #token: 31781, token usage: 0.84, gen throughput (token/s): 340.79, #queue-req: 7
- 2025-07-20 15:45:35,786 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:36,728 - sglang - INFO - [2025-07-20 15:45:36 TP0] Decode batch. #running-req: 8, #token: 32101, token usage: 0.85, gen throughput (token/s): 339.73, #queue-req: 7
- 2025-07-20 15:45:36,728 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:37,669 - sglang - INFO - [2025-07-20 15:45:37 TP0] Decode batch. #running-req: 8, #token: 32421, token usage: 0.85, gen throughput (token/s): 339.89, #queue-req: 7
- 2025-07-20 15:45:37,670 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:38,610 - sglang - INFO - [2025-07-20 15:45:38 TP0] Decode batch. #running-req: 8, #token: 32741, token usage: 0.86, gen throughput (token/s): 340.39, #queue-req: 7
- 2025-07-20 15:45:38,610 - __main__ - INFO - sglang running req: 8 queue req: 7
- 2025-07-20 15:45:38,728 - sglang - INFO - [2025-07-20 15:45:38 TP0] Prefill batch. #new-seq: 1, #new-token: 3140, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 7, #queue-req: 6
- 2025-07-20 15:45:38,728 - __main__ - INFO - sglang running req: 7 queue req: 6
- 2025-07-20 15:45:40,475 - sglang - INFO - [2025-07-20 15:45:40 TP0] Decode batch. #running-req: 8, #token: 31934, token usage: 0.84, gen throughput (token/s): 170.97, #queue-req: 6
- 2025-07-20 15:45:40,476 - __main__ - INFO - sglang running req: 8 queue req: 6
- 2025-07-20 15:45:41,041 - sglang - INFO - [2025-07-20 15:45:41 TP0] Prefill batch. #new-seq: 1, #new-token: 3079, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 5
- 2025-07-20 15:45:41,041 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 15:45:42,341 - sglang - INFO - [2025-07-20 15:45:42 TP0] Decode batch. #running-req: 8, #token: 27719, token usage: 0.73, gen throughput (token/s): 171.00, #queue-req: 5
- 2025-07-20 15:45:42,341 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 15:45:42,364 - sglang - INFO - [2025-07-20 15:45:42 TP0] Prefill batch. #new-seq: 1, #new-token: 3605, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 4
- 2025-07-20 15:45:42,365 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 15:45:43,854 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:43,854 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 928.95 1040.45
- sglang_output_tokens 262.29 284.22
- 2025-07-20 15:45:43,854 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 496 | 500
- 1 | 2 | 10
- 2025-07-20 15:45:44,307 - sglang - INFO - [2025-07-20 15:45:44 TP0] Decode batch. #running-req: 8, #token: 31643, token usage: 0.83, gen throughput (token/s): 162.29, #queue-req: 4
- 2025-07-20 15:45:44,307 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:45:45,245 - sglang - INFO - [2025-07-20 15:45:45 TP0] Decode batch. #running-req: 8, #token: 31963, token usage: 0.84, gen throughput (token/s): 340.91, #queue-req: 4
- 2025-07-20 15:45:45,245 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:45:46,226 - sglang - INFO - [2025-07-20 15:45:46 TP0] Decode batch. #running-req: 8, #token: 32283, token usage: 0.85, gen throughput (token/s): 326.38, #queue-req: 4
- 2025-07-20 15:45:46,226 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:45:47,169 - sglang - INFO - [2025-07-20 15:45:47 TP0] Decode batch. #running-req: 8, #token: 32603, token usage: 0.86, gen throughput (token/s): 339.08, #queue-req: 4
- 2025-07-20 15:45:47,170 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:45:47,713 - sglang - INFO - [2025-07-20 15:45:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 7, #queue-req: 3
- 2025-07-20 15:45:47,713 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 15:45:48,950 - sglang - INFO - [2025-07-20 15:45:48 TP0] Decode batch. #running-req: 8, #token: 31453, token usage: 0.83, gen throughput (token/s): 179.17, #queue-req: 3
- 2025-07-20 15:45:48,950 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-07-20 15:45:49,895 - sglang - INFO - [2025-07-20 15:45:49 TP0] Decode batch. #running-req: 8, #token: 31773, token usage: 0.84, gen throughput (token/s): 338.63, #queue-req: 3
- 2025-07-20 15:45:49,895 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-07-20 15:45:50,297 - sglang - INFO - [2025-07-20 15:45:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 7, #queue-req: 2
- 2025-07-20 15:45:50,298 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 15:45:51,671 - sglang - INFO - [2025-07-20 15:45:51 TP0] Decode batch. #running-req: 8, #token: 30497, token usage: 0.80, gen throughput (token/s): 179.57, #queue-req: 2
- 2025-07-20 15:45:51,671 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:45:52,610 - sglang - INFO - [2025-07-20 15:45:52 TP0] Decode batch. #running-req: 8, #token: 30817, token usage: 0.81, gen throughput (token/s): 340.94, #queue-req: 2
- 2025-07-20 15:45:52,610 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:45:53,548 - sglang - INFO - [2025-07-20 15:45:53 TP0] Decode batch. #running-req: 8, #token: 31137, token usage: 0.82, gen throughput (token/s): 340.88, #queue-req: 2
- 2025-07-20 15:45:53,548 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:45:53,855 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 15:45:53,855 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 926.93 1030.15
- sglang_output_tokens 261.87 282.35
- 2025-07-20 15:45:53,855 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 496 | 500
- 1 | 4 | 10
- 2025-07-20 15:45:54,490 - sglang - INFO - [2025-07-20 15:45:54 TP0] Decode batch. #running-req: 8, #token: 31457, token usage: 0.83, gen throughput (token/s): 339.95, #queue-req: 2
- 2025-07-20 15:45:54,490 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:45:55,430 - sglang - INFO - [2025-07-20 15:45:55 TP0] Decode batch. #running-req: 8, #token: 31777, token usage: 0.84, gen throughput (token/s): 340.19, #queue-req: 2
- 2025-07-20 15:45:55,430 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:45:56,089 - sglang - INFO - [2025-07-20 15:45:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 1
- 2025-07-20 15:45:56,089 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 15:45:57,182 - sglang - INFO - [2025-07-20 15:45:57 TP0] Decode batch. #running-req: 8, #token: 30438, token usage: 0.80, gen throughput (token/s): 182.15, #queue-req: 1
- 2025-07-20 15:45:57,182 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:45:58,131 - sglang - INFO - [2025-07-20 15:45:58 TP0] Decode batch. #running-req: 8, #token: 30758, token usage: 0.81, gen throughput (token/s): 337.29, #queue-req: 1
- 2025-07-20 15:45:58,131 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:45:59,070 - sglang - INFO - [2025-07-20 15:45:59 TP0] Decode batch. #running-req: 8, #token: 31078, token usage: 0.82, gen throughput (token/s): 340.60, #queue-req: 1
- 2025-07-20 15:45:59,070 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:46:00,011 - sglang - INFO - [2025-07-20 15:46:00 TP0] Decode batch. #running-req: 8, #token: 31398, token usage: 0.83, gen throughput (token/s): 340.20, #queue-req: 1
- 2025-07-20 15:46:00,011 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:46:00,951 - sglang - INFO - [2025-07-20 15:46:00 TP0] Decode batch. #running-req: 8, #token: 31718, token usage: 0.83, gen throughput (token/s): 340.28, #queue-req: 1
- 2025-07-20 15:46:00,951 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:46:01,893 - sglang - INFO - [2025-07-20 15:46:01 TP0] Decode batch. #running-req: 8, #token: 32038, token usage: 0.84, gen throughput (token/s): 339.79, #queue-req: 1
- 2025-07-20 15:46:01,893 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:46:02,011 - sglang - INFO - [2025-07-20 15:46:02 TP0] Prefill batch. #new-seq: 1, #new-token: 3361, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
- 2025-07-20 15:46:02,011 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:02,369 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 15:46:02,370 - __main__ - INFO - Worker 2 processing work item 16158dc6fac58e5a41d3888b9554c3d75b2a5744
- 2025-07-20 15:46:02,370 - __main__ - INFO - Created all tasks for 16158dc6fac58e5a41d3888b9554c3d75b2a5744
- 2025-07-20 15:46:02,374 - __main__ - INFO - Got 5 pages to do for scripts/data/12445200726503846U3442014055009.pdf in worker 2
- 2025-07-20 15:46:02,462 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-5
- 2025-07-20 15:46:02,494 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-2
- 2025-07-20 15:46:02,542 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-3
- 2025-07-20 15:46:02,606 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-1
- 2025-07-20 15:46:02,735 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-4
- 2025-07-20 15:46:02,979 - sglang - INFO - [2025-07-20 15:46:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1234, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.82, #running-req: 8, #queue-req: 4
- 2025-07-20 15:46:02,980 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:03,857 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 15:46:03,857 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 924.55 1011.15
- sglang_output_tokens 261.89 281.43
- 2025-07-20 15:46:03,857 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 496 | 500
- 1 | 6 | 10
- 2 | 0 | 5
- 2025-07-20 15:46:04,354 - sglang - INFO - [2025-07-20 15:46:04 TP0] Decode batch. #running-req: 9, #token: 32535, token usage: 0.86, gen throughput (token/s): 143.85, #queue-req: 4
- 2025-07-20 15:46:04,354 - __main__ - INFO - sglang running req: 9 queue req: 4
- 2025-07-20 15:46:05,346 - sglang - INFO - [2025-07-20 15:46:05 TP0] Decode batch. #running-req: 9, #token: 32895, token usage: 0.87, gen throughput (token/s): 362.98, #queue-req: 4
- 2025-07-20 15:46:05,346 - __main__ - INFO - sglang running req: 9 queue req: 4
- 2025-07-20 15:46:06,340 - sglang - INFO - [2025-07-20 15:46:06 TP0] Decode batch. #running-req: 9, #token: 33255, token usage: 0.88, gen throughput (token/s): 362.08, #queue-req: 4
- 2025-07-20 15:46:06,340 - __main__ - INFO - sglang running req: 9 queue req: 4
- 2025-07-20 15:46:07,307 - sglang - INFO - [2025-07-20 15:46:07 TP0] Decode batch. #running-req: 8, #token: 32226, token usage: 0.85, gen throughput (token/s): 340.11, #queue-req: 4
- 2025-07-20 15:46:07,307 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:08,250 - sglang - INFO - [2025-07-20 15:46:08 TP0] Decode batch. #running-req: 8, #token: 32546, token usage: 0.86, gen throughput (token/s): 339.27, #queue-req: 4
- 2025-07-20 15:46:08,251 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:09,194 - sglang - INFO - [2025-07-20 15:46:09 TP0] Decode batch. #running-req: 8, #token: 32866, token usage: 0.87, gen throughput (token/s): 339.15, #queue-req: 4
- 2025-07-20 15:46:09,194 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:10,139 - sglang - INFO - [2025-07-20 15:46:10 TP0] Decode batch. #running-req: 8, #token: 33186, token usage: 0.87, gen throughput (token/s): 338.42, #queue-req: 4
- 2025-07-20 15:46:10,140 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:11,089 - sglang - INFO - [2025-07-20 15:46:11 TP0] Decode batch. #running-req: 8, #token: 33506, token usage: 0.88, gen throughput (token/s): 337.03, #queue-req: 4
- 2025-07-20 15:46:11,089 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:12,036 - sglang - INFO - [2025-07-20 15:46:12 TP0] Decode batch. #running-req: 8, #token: 33826, token usage: 0.89, gen throughput (token/s): 337.86, #queue-req: 4
- 2025-07-20 15:46:12,036 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:12,982 - sglang - INFO - [2025-07-20 15:46:12 TP0] Decode batch. #running-req: 8, #token: 34146, token usage: 0.90, gen throughput (token/s): 338.22, #queue-req: 4
- 2025-07-20 15:46:12,982 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:13,859 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 15:46:13,859 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 918.29 1010.52
- sglang_output_tokens 259.94 281.11
- 2025-07-20 15:46:13,859 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 496 | 500
- 1 | 6 | 10
- 2 | 1 | 5
- 2025-07-20 15:46:13,933 - sglang - INFO - [2025-07-20 15:46:13 TP0] Decode batch. #running-req: 8, #token: 34466, token usage: 0.91, gen throughput (token/s): 336.68, #queue-req: 4
- 2025-07-20 15:46:13,933 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:14,881 - sglang - INFO - [2025-07-20 15:46:14 TP0] Decode batch. #running-req: 8, #token: 34786, token usage: 0.92, gen throughput (token/s): 337.50, #queue-req: 4
- 2025-07-20 15:46:14,881 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:15,829 - sglang - INFO - [2025-07-20 15:46:15 TP0] Decode batch. #running-req: 8, #token: 35106, token usage: 0.92, gen throughput (token/s): 337.59, #queue-req: 4
- 2025-07-20 15:46:15,829 - __main__ - INFO - sglang running req: 8 queue req: 4
- 2025-07-20 15:46:16,612 - sglang - INFO - [2025-07-20 15:46:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2072, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.83, #running-req: 7, #queue-req: 3
- 2025-07-20 15:46:16,612 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 15:46:17,462 - sglang - INFO - [2025-07-20 15:46:17 TP0] Decode batch. #running-req: 8, #token: 33638, token usage: 0.89, gen throughput (token/s): 195.29, #queue-req: 3
- 2025-07-20 15:46:17,462 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-07-20 15:46:18,422 - sglang - INFO - [2025-07-20 15:46:18 TP0] Decode batch. #running-req: 8, #token: 33958, token usage: 0.89, gen throughput (token/s): 333.39, #queue-req: 3
- 2025-07-20 15:46:18,422 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-07-20 15:46:18,637 - sglang - INFO - [2025-07-20 15:46:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1378, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 7, #queue-req: 2
- 2025-07-20 15:46:18,637 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 15:46:19,886 - sglang - INFO - [2025-07-20 15:46:19 TP0] Decode batch. #running-req: 8, #token: 31152, token usage: 0.82, gen throughput (token/s): 217.88, #queue-req: 2
- 2025-07-20 15:46:19,886 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:46:20,825 - sglang - INFO - [2025-07-20 15:46:20 TP0] Decode batch. #running-req: 8, #token: 31472, token usage: 0.83, gen throughput (token/s): 340.63, #queue-req: 2
- 2025-07-20 15:46:20,826 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:46:21,769 - sglang - INFO - [2025-07-20 15:46:21 TP0] Decode batch. #running-req: 8, #token: 31792, token usage: 0.84, gen throughput (token/s): 339.31, #queue-req: 2
- 2025-07-20 15:46:21,769 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:46:22,711 - sglang - INFO - [2025-07-20 15:46:22 TP0] Decode batch. #running-req: 8, #token: 32112, token usage: 0.85, gen throughput (token/s): 339.62, #queue-req: 2
- 2025-07-20 15:46:22,711 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 15:46:22,853 - sglang - INFO - [2025-07-20 15:46:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2461, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 1
- 2025-07-20 15:46:22,853 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 15:46:23,861 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 15:46:23,861 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 918.22 976.63
- sglang_output_tokens 260.88 275.95
- 2025-07-20 15:46:23,861 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 497 | 500
- 1 | 8 | 10
- 2 | 1 | 5
- 2025-07-20 15:46:24,406 - sglang - INFO - [2025-07-20 15:46:24 TP0] Decode batch. #running-req: 8, #token: 30352, token usage: 0.80, gen throughput (token/s): 188.19, #queue-req: 1
- 2025-07-20 15:46:24,406 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 15:46:24,735 - sglang - INFO - [2025-07-20 15:46:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2640, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 7, #queue-req: 0
- 2025-07-20 15:46:24,735 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:26,138 - sglang - INFO - [2025-07-20 15:46:26 TP0] Decode batch. #running-req: 8, #token: 31729, token usage: 0.84, gen throughput (token/s): 184.20, #queue-req: 0
- 2025-07-20 15:46:26,138 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 15:46:26,688 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:46:26,977 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
- 2025-07-20 15:46:27,074 - sglang - INFO - [2025-07-20 15:46:27 TP0] Decode batch. #running-req: 7, #token: 27788, token usage: 0.73, gen throughput (token/s): 323.76, #queue-req: 0
- 2025-07-20 15:46:27,074 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:27,196 - sglang - INFO - [2025-07-20 15:46:27 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
- 2025-07-20 15:46:27,196 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:28,865 - sglang - INFO - [2025-07-20 15:46:28 TP0] Decode batch. #running-req: 8, #token: 30987, token usage: 0.82, gen throughput (token/s): 175.82, #queue-req: 0
- 2025-07-20 15:46:28,865 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 15:46:29,807 - sglang - INFO - [2025-07-20 15:46:29 TP0] Decode batch. #running-req: 8, #token: 31307, token usage: 0.82, gen throughput (token/s): 339.72, #queue-req: 0
- 2025-07-20 15:46:29,807 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 15:46:30,335 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:46:30,539 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
- 2025-07-20 15:46:30,703 - sglang - INFO - [2025-07-20 15:46:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
- 2025-07-20 15:46:30,703 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:31,560 - sglang - INFO - [2025-07-20 15:46:31 TP0] Decode batch. #running-req: 8, #token: 30417, token usage: 0.80, gen throughput (token/s): 173.44, #queue-req: 0
- 2025-07-20 15:46:31,560 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 15:46:32,405 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 15:46:32,406 - __main__ - INFO - Worker 3 processing work item b903c79fc04852a9f203dfa04143731928e937aa
- 2025-07-20 15:46:32,406 - __main__ - INFO - Created all tasks for b903c79fc04852a9f203dfa04143731928e937aa
- 2025-07-20 15:46:32,416 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/ambiguous.pdf in worker 3
- 2025-07-20 15:46:32,421 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/failing_anchor_pg4.pdf in worker 3
- 2025-07-20 15:46:32,422 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/dolma-page-1.pdf in worker 3
- 2025-07-20 15:46:32,427 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/edgar.pdf in worker 3
- 2025-07-20 15:46:32,438 - __main__ - INFO - Got 3 pages to do for tests/gnarly_pdfs/guidebook_failed_pages.pdf in worker 3
- 2025-07-20 15:46:32,447 - __main__ - INFO - Got 48 pages to do for tests/gnarly_pdfs/bws_book_ch2.pdf in worker 3
- 2025-07-20 15:46:32,451 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/handwriting_bad_ocr.pdf in worker 3
- 2025-07-20 15:46:32,455 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/large_prompt_hint2.pdf in worker 3
- 2025-07-20 15:46:32,487 - __main__ - INFO - Got 4 pages to do for tests/gnarly_pdfs/large_prompt_hint3.pdf in worker 3
- 2025-07-20 15:46:32,561 - __main__ - INFO - Got 54 pages to do for tests/gnarly_pdfs/overrun_on_pg8.pdf in worker 3
- 2025-07-20 15:46:32,567 - __main__ - INFO - Got 26 pages to do for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf in worker 3
- 2025-07-20 15:46:32,650 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/not_parsing2.pdf in worker 3
- 2025-07-20 15:46:32,656 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf in worker 3
- 2025-07-20 15:46:32,733 - __main__ - INFO - Got 14 pages to do for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf in worker 3
- 2025-07-20 15:46:32,740 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/skinnypage.pdf in worker 3
- 2025-07-20 15:46:32,743 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/lots_of_sci_tables.pdf in worker 3
- 2025-07-20 15:46:32,745 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/olmo-page-1.pdf in worker 3
- 2025-07-20 15:46:32,756 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/form_on_later_pages.pdf in worker 3
- 2025-07-20 15:46:32,763 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/lots_of_chem_tables.pdf in worker 3
- 2025-07-20 15:46:32,837 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/newspaper.pdf in worker 3
- 2025-07-20 15:46:32,842 - __main__ - INFO - Got 7 pages to do for tests/gnarly_pdfs/most_content_in_image_form.pdf in worker 3
- 2025-07-20 15:46:32,857 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/failing_pdf_pg9.pdf in worker 3
- 2025-07-20 15:46:32,867 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/small_page_size.pdf in worker 3
- 2025-07-20 15:46:33,552 - __main__ - INFO - Got 106 pages to do for tests/gnarly_pdfs/instructions_and_schematics.pdf in worker 3
- 2025-07-20 15:46:33,648 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/map1.pdf in worker 3
- 2025-07-20 15:46:33,660 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/some_ocr1.pdf in worker 3
- 2025-07-20 15:46:34,583 - __main__ - INFO - Got 27 pages to do for tests/gnarly_pdfs/large_prompt_hint1.pdf in worker 3
- 2025-07-20 15:46:34,600 - __main__ - INFO - Got 40 pages to do for tests/gnarly_pdfs/ti89_guidebook_programming.pdf in worker 3
- 2025-07-20 15:46:34,634 - __main__ - INFO - Got 29 pages to do for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf in worker 3
- 2025-07-20 15:46:34,661 - __main__ - INFO - Got 68 pages to do for tests/gnarly_pdfs/slideshow_mostly_images.pdf in worker 3
- 2025-07-20 15:46:34,667 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/not_parsing.pdf in worker 3
- 2025-07-20 15:46:34,680 - __main__ - INFO - Got 16 pages to do for tests/gnarly_pdfs/load_v_error.pdf in worker 3
- 2025-07-20 15:46:34,707 - sglang - INFO - [2025-07-20 15:46:32 TP0] Decode batch. #running-req: 7, #token: 23748, token usage: 0.63, gen throughput (token/s): 327.70, #queue-req: 0
- 2025-07-20 15:46:34,708 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:34,708 - sglang - INFO - [2025-07-20 15:46:34 TP0] Decode batch. #running-req: 7, #token: 24028, token usage: 0.63, gen throughput (token/s): 160.43, #queue-req: 0
- 2025-07-20 15:46:34,735 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:34,737 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:46:34,739 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 919.40 976.75
- sglang_output_tokens 262.76 281.58
- 2025-07-20 15:46:34,741 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 497 | 500
- 1 | 9 | 10
- 2 | 2 | 5
- 3 | 0 | 529
- 2025-07-20 15:46:34,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ambiguous.pdf-1
- 2025-07-20 15:46:34,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-2
- 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-1
- 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-6
- 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-1
- 2025-07-20 15:46:34,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-8
- 2025-07-20 15:46:34,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-5
- 2025-07-20 15:46:34,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-7
- 2025-07-20 15:46:34,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-2
- 2025-07-20 15:46:34,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-1
- 2025-07-20 15:46:34,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-13
- 2025-07-20 15:46:34,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-16
- 2025-07-20 15:46:34,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-9
- 2025-07-20 15:46:34,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-10
- 2025-07-20 15:46:34,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-11
- 2025-07-20 15:46:34,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-6
- 2025-07-20 15:46:34,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-19
- 2025-07-20 15:46:34,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-15
- 2025-07-20 15:46:34,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-18
- 2025-07-20 15:46:34,759 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-7
- 2025-07-20 15:46:34,759 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-14
- 2025-07-20 15:46:34,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-3
- 2025-07-20 15:46:34,763 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-4
- 2025-07-20 15:46:34,764 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-2
- 2025-07-20 15:46:34,766 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-4
- 2025-07-20 15:46:34,767 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-17
- 2025-07-20 15:46:34,768 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-3
- 2025-07-20 15:46:34,770 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-12
- 2025-07-20 15:46:34,771 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-5
- 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-41
- 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-8
- 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/dolma-page-1.pdf-1
- 2025-07-20 15:46:34,773 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-30
- 2025-07-20 15:46:34,773 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-23
- 2025-07-20 15:46:34,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-21
- 2025-07-20 15:46:34,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-28
- 2025-07-20 15:46:34,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-22
- 2025-07-20 15:46:34,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-32
- 2025-07-20 15:46:34,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-39
- 2025-07-20 15:46:34,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-31
- 2025-07-20 15:46:34,779 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-3
- 2025-07-20 15:46:34,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-24
- 2025-07-20 15:46:34,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-29
- 2025-07-20 15:46:34,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-34
- 2025-07-20 15:46:34,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-37
- 2025-07-20 15:46:34,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-40
- 2025-07-20 15:46:34,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-20
- 2025-07-20 15:46:34,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-25
- 2025-07-20 15:46:34,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-36
- 2025-07-20 15:46:34,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-35
- 2025-07-20 15:46:34,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-27
- 2025-07-20 15:46:34,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/edgar.pdf-1
- 2025-07-20 15:46:34,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-26
- 2025-07-20 15:46:34,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-33
- 2025-07-20 15:46:34,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-38
- 2025-07-20 15:46:34,855 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-42
- 2025-07-20 15:46:34,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-46
- 2025-07-20 15:46:34,861 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-44
- 2025-07-20 15:46:34,863 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-45
- 2025-07-20 15:46:34,865 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-48
- 2025-07-20 15:46:34,866 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-47
- 2025-07-20 15:46:34,867 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-43
- 2025-07-20 15:46:35,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-6
- 2025-07-20 15:46:35,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-1
- 2025-07-20 15:46:35,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-12
- 2025-07-20 15:46:35,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-17
- 2025-07-20 15:46:35,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-18
- 2025-07-20 15:46:35,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-3
- 2025-07-20 15:46:35,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-2
- 2025-07-20 15:46:35,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-9
- 2025-07-20 15:46:35,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-4
- 2025-07-20 15:46:35,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-19
- 2025-07-20 15:46:35,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-14
- 2025-07-20 15:46:35,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-16
- 2025-07-20 15:46:35,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-5
- 2025-07-20 15:46:35,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-13
- 2025-07-20 15:46:35,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-2
- 2025-07-20 15:46:35,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-4
- 2025-07-20 15:46:35,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-11
- 2025-07-20 15:46:35,946 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-7
- 2025-07-20 15:46:36,034 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-3
- 2025-07-20 15:46:36,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-10
- 2025-07-20 15:46:36,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-6
- 2025-07-20 15:46:36,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-21
- 2025-07-20 15:46:36,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-1
- 2025-07-20 15:46:36,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-8
- 2025-07-20 15:46:36,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-22
- 2025-07-20 15:46:36,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-15
- 2025-07-20 15:46:36,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-30
- 2025-07-20 15:46:36,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-25
- 2025-07-20 15:46:36,634 - sglang - INFO - [2025-07-20 15:46:36 TP0] Decode batch. #running-req: 7, #token: 24308, token usage: 0.64, gen throughput (token/s): 118.91, #queue-req: 0
- 2025-07-20 15:46:36,635 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:36,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-37
- 2025-07-20 15:46:36,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-31
- 2025-07-20 15:46:36,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-20
- 2025-07-20 15:46:36,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-24
- 2025-07-20 15:46:36,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-35
- 2025-07-20 15:46:36,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-38
- 2025-07-20 15:46:36,842 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-23
- 2025-07-20 15:46:36,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-36
- 2025-07-20 15:46:36,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-29
- 2025-07-20 15:46:36,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-39
- 2025-07-20 15:46:37,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-26
- 2025-07-20 15:46:37,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-5
- 2025-07-20 15:46:37,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-34
- 2025-07-20 15:46:37,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-32
- 2025-07-20 15:46:37,239 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-44
- 2025-07-20 15:46:37,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-27
- 2025-07-20 15:46:37,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-28
- 2025-07-20 15:46:37,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-40
- 2025-07-20 15:46:37,438 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-45
- 2025-07-20 15:46:37,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-46
- 2025-07-20 15:46:37,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4
- 2025-07-20 15:46:37,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-33
- 2025-07-20 15:46:37,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-47
- 2025-07-20 15:46:37,552 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-43
- 2025-07-20 15:46:37,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2
- 2025-07-20 15:46:37,737 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-48
- 2025-07-20 15:46:37,747 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-52
- 2025-07-20 15:46:37,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-49
- 2025-07-20 15:46:37,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1
- 2025-07-20 15:46:37,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-50
- 2025-07-20 15:46:37,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-41
- 2025-07-20 15:46:37,933 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3
- 2025-07-20 15:46:37,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8
- 2025-07-20 15:46:38,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-54
- 2025-07-20 15:46:38,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14
- 2025-07-20 15:46:38,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-51
- 2025-07-20 15:46:38,347 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6
- 2025-07-20 15:46:38,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-42
- 2025-07-20 15:46:38,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10
- 2025-07-20 15:46:38,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7
- 2025-07-20 15:46:38,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11
- 2025-07-20 15:46:38,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5
- 2025-07-20 15:46:38,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22
- 2025-07-20 15:46:38,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21
- 2025-07-20 15:46:38,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15
- 2025-07-20 15:46:38,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24
- 2025-07-20 15:46:38,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26
- 2025-07-20 15:46:38,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12
- 2025-07-20 15:46:38,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-53
- 2025-07-20 15:46:38,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13
- 2025-07-20 15:46:38,952 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17
- 2025-07-20 15:46:39,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-2
- 2025-07-20 15:46:39,140 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20
- 2025-07-20 15:46:39,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23
- 2025-07-20 15:46:39,233 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18
- 2025-07-20 15:46:39,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16
- 2025-07-20 15:46:39,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19
- 2025-07-20 15:46:39,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9
- 2025-07-20 15:46:39,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25
- 2025-07-20 15:46:39,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-1
- 2025-07-20 15:46:39,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-3
- 2025-07-20 15:46:39,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-5
- 2025-07-20 15:46:39,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-1
- 2025-07-20 15:46:39,933 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-2
- 2025-07-20 15:46:40,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-9
- 2025-07-20 15:46:40,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3
- 2025-07-20 15:46:40,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-6
- 2025-07-20 15:46:40,452 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-4
- 2025-07-20 15:46:40,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-4
- 2025-07-20 15:46:40,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1
- 2025-07-20 15:46:40,633 - sglang - INFO - [2025-07-20 15:46:40 TP0] Decode batch. #running-req: 7, #token: 24588, token usage: 0.65, gen throughput (token/s): 70.00, #queue-req: 0
- 2025-07-20 15:46:40,633 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:40,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5
- 2025-07-20 15:46:40,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4
- 2025-07-20 15:46:40,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2
- 2025-07-20 15:46:40,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7
- 2025-07-20 15:46:40,837 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4
- 2025-07-20 15:46:40,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-7
- 2025-07-20 15:46:40,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7
- 2025-07-20 15:46:40,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10
- 2025-07-20 15:46:41,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:46:41,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1
- 2025-07-20 15:46:41,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9
- 2025-07-20 15:46:41,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-8
- 2025-07-20 15:46:41,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6
- 2025-07-20 15:46:41,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6
- 2025-07-20 15:46:41,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3
- 2025-07-20 15:46:41,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-1
- 2025-07-20 15:46:41,441 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5
- 2025-07-20 15:46:41,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9
- 2025-07-20 15:46:41,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11
- 2025-07-20 15:46:41,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13
- 2025-07-20 15:46:41,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10
- 2025-07-20 15:46:41,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12
- 2025-07-20 15:46:41,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-5
- 2025-07-20 15:46:41,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-7
- 2025-07-20 15:46:41,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-6
- 2025-07-20 15:46:41,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-4
- 2025-07-20 15:46:42,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-8
- 2025-07-20 15:46:42,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8
- 2025-07-20 15:46:42,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-1
- 2025-07-20 15:46:42,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-5
- 2025-07-20 15:46:42,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-1
- 2025-07-20 15:46:42,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-2
- 2025-07-20 15:46:42,142 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-4
- 2025-07-20 15:46:42,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8
- 2025-07-20 15:46:42,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2
- 2025-07-20 15:46:42,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-10
- 2025-07-20 15:46:42,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14
- 2025-07-20 15:46:42,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-3
- 2025-07-20 15:46:42,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-6
- 2025-07-20 15:46:42,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-3
- 2025-07-20 15:46:42,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-3
- 2025-07-20 15:46:42,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-9
- 2025-07-20 15:46:42,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-2
- 2025-07-20 15:46:42,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-9
- 2025-07-20 15:46:42,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-3
- 2025-07-20 15:46:42,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-2
- 2025-07-20 15:46:42,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-4
- 2025-07-20 15:46:42,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-7
- 2025-07-20 15:46:42,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-1
- 2025-07-20 15:46:42,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-8
- 2025-07-20 15:46:42,760 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-6
- 2025-07-20 15:46:42,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-9
- 2025-07-20 15:46:43,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-1
- 2025-07-20 15:46:43,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-5
- 2025-07-20 15:46:43,064 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-9
- 2025-07-20 15:46:43,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/olmo-page-1.pdf-1
- 2025-07-20 15:46:43,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-2
- 2025-07-20 15:46:43,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-5
- 2025-07-20 15:46:43,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-11
- 2025-07-20 15:46:43,366 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-5
- 2025-07-20 15:46:43,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-7
- 2025-07-20 15:46:43,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-10
- 2025-07-20 15:46:43,457 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2
- 2025-07-20 15:46:43,458 - sglang - INFO - [2025-07-20 15:46:43 TP0] Decode batch. #running-req: 7, #token: 24868, token usage: 0.65, gen throughput (token/s): 99.20, #queue-req: 0
- 2025-07-20 15:46:43,458 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:43,534 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-8
- 2025-07-20 15:46:43,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-4
- 2025-07-20 15:46:43,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-4
- 2025-07-20 15:46:43,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-5
- 2025-07-20 15:46:43,555 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-1
- 2025-07-20 15:46:43,562 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-3
- 2025-07-20 15:46:43,644 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-2
- 2025-07-20 15:46:43,660 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-3
- 2025-07-20 15:46:43,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-16
- 2025-07-20 15:46:43,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-8
- 2025-07-20 15:46:43,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-15
- 2025-07-20 15:46:43,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-6
- 2025-07-20 15:46:43,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-22
- 2025-07-20 15:46:43,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-14
- 2025-07-20 15:46:43,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-12
- 2025-07-20 15:46:43,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-7
- 2025-07-20 15:46:43,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-2
- 2025-07-20 15:46:43,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-20
- 2025-07-20 15:46:43,871 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-3
- 2025-07-20 15:46:43,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-4
- 2025-07-20 15:46:43,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-23
- 2025-07-20 15:46:43,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-24
- 2025-07-20 15:46:43,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-26
- 2025-07-20 15:46:43,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-19
- 2025-07-20 15:46:43,954 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-21
- 2025-07-20 15:46:44,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-13
- 2025-07-20 15:46:44,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-28
- 2025-07-20 15:46:44,142 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1
- 2025-07-20 15:46:44,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-25
- 2025-07-20 15:46:44,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-18
- 2025-07-20 15:46:44,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/newspaper.pdf-1
- 2025-07-20 15:46:44,254 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-6
- 2025-07-20 15:46:44,255 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-44
- 2025-07-20 15:46:44,260 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-30
- 2025-07-20 15:46:44,348 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-29
- 2025-07-20 15:46:44,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-35
- 2025-07-20 15:46:44,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-17
- 2025-07-20 15:46:44,435 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-34
- 2025-07-20 15:46:44,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-56
- 2025-07-20 15:46:44,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-37
- 2025-07-20 15:46:44,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-41
- 2025-07-20 15:46:44,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-43
- 2025-07-20 15:46:44,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-46
- 2025-07-20 15:46:44,548 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-36
- 2025-07-20 15:46:44,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-51
- 2025-07-20 15:46:44,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-47
- 2025-07-20 15:46:44,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-38
- 2025-07-20 15:46:44,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-27
- 2025-07-20 15:46:44,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-32
- 2025-07-20 15:46:44,650 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-1
- 2025-07-20 15:46:44,653 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-59
- 2025-07-20 15:46:44,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-33
- 2025-07-20 15:46:44,742 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:46:44,742 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 912.38 976.75
- sglang_output_tokens 260.75 281.58
- 2025-07-20 15:46:44,742 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 497 | 500
- 1 | 9 | 10
- 2 | 2 | 5
- 3 | 0 | 529
- 2025-07-20 15:46:44,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-49
- 2025-07-20 15:46:44,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-40
- 2025-07-20 15:46:44,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-50
- 2025-07-20 15:46:44,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-65
- 2025-07-20 15:46:44,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-62
- 2025-07-20 15:46:44,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-64
- 2025-07-20 15:46:44,936 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-70
- 2025-07-20 15:46:44,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-52
- 2025-07-20 15:46:44,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-61
- 2025-07-20 15:46:44,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-69
- 2025-07-20 15:46:44,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-71
- 2025-07-20 15:46:44,962 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-55
- 2025-07-20 15:46:45,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-73
- 2025-07-20 15:46:45,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-45
- 2025-07-20 15:46:45,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-39
- 2025-07-20 15:46:45,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-72
- 2025-07-20 15:46:45,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-63
- 2025-07-20 15:46:45,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-48
- 2025-07-20 15:46:45,134 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-42
- 2025-07-20 15:46:45,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-76
- 2025-07-20 15:46:45,163 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-79
- 2025-07-20 15:46:45,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-81
- 2025-07-20 15:46:45,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-54
- 2025-07-20 15:46:45,241 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-75
- 2025-07-20 15:46:45,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-77
- 2025-07-20 15:46:45,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-68
- 2025-07-20 15:46:45,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-57
- 2025-07-20 15:46:45,249 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-60
- 2025-07-20 15:46:45,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/small_page_size.pdf-1
- 2025-07-20 15:46:45,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-80
- 2025-07-20 15:46:45,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-83
- 2025-07-20 15:46:45,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-66
- 2025-07-20 15:46:45,364 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-74
- 2025-07-20 15:46:45,367 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-86
- 2025-07-20 15:46:45,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-7
- 2025-07-20 15:46:45,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-78
- 2025-07-20 15:46:45,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-67
- 2025-07-20 15:46:45,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-94
- 2025-07-20 15:46:45,456 - sglang - INFO - [2025-07-20 15:46:45 TP0] Decode batch. #running-req: 7, #token: 25148, token usage: 0.66, gen throughput (token/s): 139.99, #queue-req: 0
- 2025-07-20 15:46:45,456 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:45,462 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-89
- 2025-07-20 15:46:45,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-85
- 2025-07-20 15:46:45,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-58
- 2025-07-20 15:46:45,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-92
- 2025-07-20 15:46:45,558 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-53
- 2025-07-20 15:46:45,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-99
- 2025-07-20 15:46:45,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-96
- 2025-07-20 15:46:45,662 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-103
- 2025-07-20 15:46:45,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-90
- 2025-07-20 15:46:45,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-84
- 2025-07-20 15:46:45,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-98
- 2025-07-20 15:46:45,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-93
- 2025-07-20 15:46:45,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-101
- 2025-07-20 15:46:45,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-91
- 2025-07-20 15:46:45,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-104
- 2025-07-20 15:46:45,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-82
- 2025-07-20 15:46:45,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-95
- 2025-07-20 15:46:45,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-31
- 2025-07-20 15:46:45,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-97
- 2025-07-20 15:46:45,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-100
- 2025-07-20 15:46:45,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-106
- 2025-07-20 15:46:46,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-88
- 2025-07-20 15:46:46,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-105
- 2025-07-20 15:46:46,067 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-102
- 2025-07-20 15:46:46,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-87
- 2025-07-20 15:46:46,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-6
- 2025-07-20 15:46:46,260 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-6
- 2025-07-20 15:46:46,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-7
- 2025-07-20 15:46:46,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-4
- 2025-07-20 15:46:46,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/some_ocr1.pdf-1
- 2025-07-20 15:46:46,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-9
- 2025-07-20 15:46:46,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-14
- 2025-07-20 15:46:46,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-3
- 2025-07-20 15:46:46,561 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-11
- 2025-07-20 15:46:46,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-10
- 2025-07-20 15:46:46,655 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-16
- 2025-07-20 15:46:46,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-5
- 2025-07-20 15:46:46,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4
- 2025-07-20 15:46:46,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-2
- 2025-07-20 15:46:46,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-21
- 2025-07-20 15:46:46,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6
- 2025-07-20 15:46:46,767 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-22
- 2025-07-20 15:46:46,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-25
- 2025-07-20 15:46:46,859 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3
- 2025-07-20 15:46:46,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-13
- 2025-07-20 15:46:46,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-17
- 2025-07-20 15:46:46,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1
- 2025-07-20 15:46:46,945 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5
- 2025-07-20 15:46:46,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-19
- 2025-07-20 15:46:46,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-15
- 2025-07-20 15:46:47,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2
- 2025-07-20 15:46:47,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-27
- 2025-07-20 15:46:47,060 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-23
- 2025-07-20 15:46:47,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8
- 2025-07-20 15:46:47,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9
- 2025-07-20 15:46:47,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12
- 2025-07-20 15:46:47,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-8
- 2025-07-20 15:46:47,148 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13
- 2025-07-20 15:46:47,149 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17
- 2025-07-20 15:46:47,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18
- 2025-07-20 15:46:47,161 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11
- 2025-07-20 15:46:47,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19
- 2025-07-20 15:46:47,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7
- 2025-07-20 15:46:47,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14
- 2025-07-20 15:46:47,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-12
- 2025-07-20 15:46:47,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-18
- 2025-07-20 15:46:47,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15
- 2025-07-20 15:46:47,254 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16
- 2025-07-20 15:46:47,257 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-26
- 2025-07-20 15:46:47,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-20
- 2025-07-20 15:46:47,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20
- 2025-07-20 15:46:47,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22
- 2025-07-20 15:46:47,354 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25
- 2025-07-20 15:46:47,365 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-24
- 2025-07-20 15:46:47,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27
- 2025-07-20 15:46:47,455 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26
- 2025-07-20 15:46:47,456 - sglang - INFO - [2025-07-20 15:46:47 TP0] Decode batch. #running-req: 7, #token: 25428, token usage: 0.67, gen throughput (token/s): 140.01, #queue-req: 0
- 2025-07-20 15:46:47,456 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:47,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23
- 2025-07-20 15:46:47,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34
- 2025-07-20 15:46:47,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29
- 2025-07-20 15:46:47,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24
- 2025-07-20 15:46:47,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33
- 2025-07-20 15:46:47,643 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10
- 2025-07-20 15:46:47,650 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31
- 2025-07-20 15:46:47,659 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28
- 2025-07-20 15:46:47,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40
- 2025-07-20 15:46:47,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38
- 2025-07-20 15:46:47,854 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30
- 2025-07-20 15:46:47,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35
- 2025-07-20 15:46:47,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12
- 2025-07-20 15:46:47,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39
- 2025-07-20 15:46:47,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36
- 2025-07-20 15:46:48,053 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32
- 2025-07-20 15:46:48,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37
- 2025-07-20 15:46:48,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8
- 2025-07-20 15:46:48,237 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6
- 2025-07-20 15:46:48,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24
- 2025-07-20 15:46:48,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21
- 2025-07-20 15:46:48,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7
- 2025-07-20 15:46:48,356 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19
- 2025-07-20 15:46:48,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14
- 2025-07-20 15:46:48,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4
- 2025-07-20 15:46:48,460 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27
- 2025-07-20 15:46:48,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23
- 2025-07-20 15:46:48,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25
- 2025-07-20 15:46:48,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-1
- 2025-07-20 15:46:48,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20
- 2025-07-20 15:46:48,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15
- 2025-07-20 15:46:48,646 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22
- 2025-07-20 15:46:48,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-5
- 2025-07-20 15:46:48,659 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11
- 2025-07-20 15:46:48,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9
- 2025-07-20 15:46:48,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17
- 2025-07-20 15:46:48,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13
- 2025-07-20 15:46:48,837 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-2
- 2025-07-20 15:46:48,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16
- 2025-07-20 15:46:48,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-1
- 2025-07-20 15:46:48,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26
- 2025-07-20 15:46:48,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-6
- 2025-07-20 15:46:48,864 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28
- 2025-07-20 15:46:48,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1
- 2025-07-20 15:46:48,966 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29
- 2025-07-20 15:46:49,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-3
- 2025-07-20 15:46:49,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21
- 2025-07-20 15:46:49,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5
- 2025-07-20 15:46:49,358 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-4
- 2025-07-20 15:46:49,364 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-9
- 2025-07-20 15:46:49,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-8
- 2025-07-20 15:46:49,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3
- 2025-07-20 15:46:49,456 - sglang - INFO - [2025-07-20 15:46:49 TP0] Decode batch. #running-req: 7, #token: 25708, token usage: 0.68, gen throughput (token/s): 139.99, #queue-req: 0
- 2025-07-20 15:46:49,456 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 15:46:49,468 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-10
- 2025-07-20 15:46:49,469 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-22
- 2025-07-20 15:46:49,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-20
- 2025-07-20 15:46:49,552 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-24
- 2025-07-20 15:46:49,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10
- 2025-07-20 15:46:49,575 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-17
- 2025-07-20 15:46:49,667 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-31
- 2025-07-20 15:46:49,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-35
- 2025-07-20 15:46:49,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18
- 2025-07-20 15:46:49,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-27
- 2025-07-20 15:46:49,760 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-21
- 2025-07-20 15:46:49,769 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-12
- 2025-07-20 15:46:49,771 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-16
- 2025-07-20 15:46:49,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-15
- 2025-07-20 15:46:49,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-14
- 2025-07-20 15:46:49,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-30
- 2025-07-20 15:46:49,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-11
- 2025-07-20 15:46:49,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-26
- 2025-07-20 15:46:49,863 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-42
- 2025-07-20 15:46:49,872 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-25
- 2025-07-20 15:46:49,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-7
- 2025-07-20 15:46:49,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-36
- 2025-07-20 15:46:49,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-19
- 2025-07-20 15:46:49,969 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-40
- 2025-07-20 15:46:50,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-23
- 2025-07-20 15:46:50,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-39
- 2025-07-20 15:46:50,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-48
- 2025-07-20 15:46:50,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-18
- 2025-07-20 15:46:50,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-29
- 2025-07-20 15:46:50,069 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-41
- 2025-07-20 15:46:50,071 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-55
- 2025-07-20 15:46:50,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-38
- 2025-07-20 15:46:50,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-28
- 2025-07-20 15:46:50,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-34
- 2025-07-20 15:46:50,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-13
- 2025-07-20 15:46:50,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-44
- 2025-07-20 15:46:50,155 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-37
- 2025-07-20 15:46:50,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-49
- 2025-07-20 15:46:50,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-46
- 2025-07-20 15:46:50,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-32
- 2025-07-20 15:46:50,277 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-54
- 2025-07-20 15:46:50,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-53
- 2025-07-20 15:46:50,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-43
- 2025-07-20 15:46:50,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-47
- 2025-07-20 15:46:50,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-45
- 2025-07-20 15:46:50,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-33
- 2025-07-20 15:46:50,366 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2
- 2025-07-20 15:46:50,367 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-68
- 2025-07-20 15:46:50,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-50
- 2025-07-20 15:46:50,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-61
- 2025-07-20 15:46:50,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-56
- 2025-07-20 15:46:50,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-57
- 2025-07-20 15:46:50,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-52
- 2025-07-20 15:46:50,455 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-58
- 2025-07-20 15:46:50,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-67
- 2025-07-20 15:46:50,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-64
- 2025-07-20 15:46:50,551 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-59
- 2025-07-20 15:46:51,061 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-51
- 2025-07-20 15:46:51,163 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-60
- 2025-07-20 15:46:51,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-4
- 2025-07-20 15:46:51,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-5
- 2025-07-20 15:46:51,165 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-62
- 2025-07-20 15:46:51,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-8
- 2025-07-20 15:46:51,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-5
- 2025-07-20 15:46:51,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-1
- 2025-07-20 15:46:51,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-66
- 2025-07-20 15:46:51,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-3
- 2025-07-20 15:46:51,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-2
- 2025-07-20 15:46:51,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-1
- 2025-07-20 15:46:51,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-6
- 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-63
- 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-14
- 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-10
- 2025-07-20 15:46:51,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-16
- 2025-07-20 15:46:51,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-7
- 2025-07-20 15:46:51,770 - sglang - INFO - [2025-07-20 15:46:51 TP0] Decode batch. #running-req: 5, #token: 20054, token usage: 0.53, gen throughput (token/s): 133.90, #queue-req: 0
- 2025-07-20 15:46:51,770 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 15:46:51,770 - sglang - INFO - [2025-07-20 15:46:51 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 5, #queue-req: 0
- 2025-07-20 15:46:51,770 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 15:46:51,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-7
- 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-8
- 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-9
- 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-65
- 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-13
- 2025-07-20 15:46:51,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-3
- 2025-07-20 15:46:51,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-4
- 2025-07-20 15:46:51,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-6
- 2025-07-20 15:46:51,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-12
- 2025-07-20 15:46:51,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-15
- 2025-07-20 15:46:52,660 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-11
- 2025-07-20 15:46:52,661 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-2
- 2025-07-20 15:46:52,665 - sglang - INFO - [2025-07-20 15:46:52 TP0] Prefill batch. #new-seq: 2, #new-token: 7179, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.56, #running-req: 6, #queue-req: 8
- 2025-07-20 15:46:52,665 - __main__ - INFO - sglang running req: 6 queue req: 8
- 2025-07-20 15:46:53,475 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:46:54,743 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:46:54,743 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 908.91 950.67
- sglang_output_tokens 259.81 274.65
- 2025-07-20 15:46:54,743 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 497 | 500
- 1 | 9 | 10
- 2 | 4 | 5
- 3 | 0 | 529
- 2025-07-20 15:46:55,637 - sglang - INFO - [2025-07-20 15:46:55] Exception in TokenizerManager:
- 2025-07-20 15:46:55,637 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:46:55,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:46:55,637 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:46:55,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:46:55,638 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:46:55,638 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:46:55,638 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:46:55,638 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:46:55,638 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:46:55,638 - sglang - INFO -
- 2025-07-20 15:46:57,188 - sglang - INFO - [2025-07-20 15:46:57 TP0] Decode batch. #running-req: 8, #token: 28709, token usage: 0.76, gen throughput (token/s): 53.95, #queue-req: 83
- 2025-07-20 15:46:57,188 - __main__ - INFO - sglang running req: 8 queue req: 83
- 2025-07-20 15:46:57,636 - sglang - INFO - [2025-07-20 15:46:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1861, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 93
- 2025-07-20 15:46:57,636 - __main__ - INFO - sglang running req: 7 queue req: 93
- 2025-07-20 15:46:59,556 - sglang - INFO - [2025-07-20 15:46:59 TP0] Decode batch. #running-req: 8, #token: 29683, token usage: 0.78, gen throughput (token/s): 134.67, #queue-req: 143
- 2025-07-20 15:46:59,557 - __main__ - INFO - sglang running req: 8 queue req: 143
- 2025-07-20 15:46:59,582 - sglang - INFO - [2025-07-20 15:46:59] ERROR: Exception in ASGI application
- 2025-07-20 15:46:59,582 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:46:59,583 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:46:59,583 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:46:59,583 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:46:59,583 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:46:59,583 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:46:59,583 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:46:59,583 - sglang - INFO - raise exc
- 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:46:59,584 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:46:59,584 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:46:59,584 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:46:59,584 - sglang - INFO - raise exc
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:46:59,584 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:46:59,584 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:46:59,584 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:46:59,585 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:46:59,585 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:46:59,585 - sglang - INFO - raise exc
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:46:59,585 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:46:59,585 - sglang - INFO - response = await f(request)
- 2025-07-20 15:46:59,585 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:46:59,585 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:46:59,585 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:46:59,585 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:46:59,586 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:46:59,586 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:46:59,586 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:46:59,586 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:46:59,586 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:46:59,587 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:46:59,587 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:46:59,587 - __main__ - WARNING - ValueError on attempt 0 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:00,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:00,455 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
- 2025-07-20 15:47:00,547 - sglang - INFO - [2025-07-20 15:47:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1958, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 7, #queue-req: 164
- 2025-07-20 15:47:00,547 - __main__ - INFO - sglang running req: 7 queue req: 164
- 2025-07-20 15:47:00,635 - __main__ - INFO - Finished TaskGroup for worker on 16158dc6fac58e5a41d3888b9554c3d75b2a5744
- 2025-07-20 15:47:00,635 - __main__ - INFO - Got 1 docs for 16158dc6fac58e5a41d3888b9554c3d75b2a5744
- 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-07-20 15:47:00,639 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-07-20 15:47:00,639 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-07-20 15:47:01,862 - sglang - INFO - [2025-07-20 15:47:01 TP0] Decode batch. #running-req: 8, #token: 28667, token usage: 0.75, gen throughput (token/s): 138.38, #queue-req: 205
- 2025-07-20 15:47:01,862 - __main__ - INFO - sglang running req: 8 queue req: 205
- 2025-07-20 15:47:03,233 - sglang - INFO - [2025-07-20 15:47:03 TP0] Decode batch. #running-req: 8, #token: 28987, token usage: 0.76, gen throughput (token/s): 233.27, #queue-req: 243
- 2025-07-20 15:47:03,234 - __main__ - INFO - sglang running req: 8 queue req: 243
- 2025-07-20 15:47:04,557 - sglang - INFO - [2025-07-20 15:47:04 TP0] Decode batch. #running-req: 8, #token: 29307, token usage: 0.77, gen throughput (token/s): 241.78, #queue-req: 282
- 2025-07-20 15:47:04,557 - __main__ - INFO - sglang running req: 8 queue req: 282
- 2025-07-20 15:47:04,744 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:04,744 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 7.35 32.62
- finished_output_tokens 1.76 7.83
- sglang_input_tokens 904.93 895.69
- sglang_output_tokens 258.39 258.44
- 2025-07-20 15:47:04,744 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 497 | 500
- 1 | 9 | 10
- 2 | 5 | 5
- 3 | 1 | 529
- 2025-07-20 15:47:05,990 - sglang - INFO - [2025-07-20 15:47:05 TP0] Decode batch. #running-req: 8, #token: 29627, token usage: 0.78, gen throughput (token/s): 223.34, #queue-req: 327
- 2025-07-20 15:47:05,990 - __main__ - INFO - sglang running req: 8 queue req: 327
- 2025-07-20 15:47:07,333 - sglang - INFO - [2025-07-20 15:47:07 TP0] Decode batch. #running-req: 8, #token: 29947, token usage: 0.79, gen throughput (token/s): 238.16, #queue-req: 363
- 2025-07-20 15:47:07,333 - __main__ - INFO - sglang running req: 8 queue req: 363
- 2025-07-20 15:47:07,571 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445224007035644H44421110A0001.pdf-3: Expecting ',' delimiter: line 1 column 2734 (char 2733)
- 2025-07-20 15:47:07,595 - sglang - INFO - [2025-07-20 15:47:07 TP0] Prefill batch. #new-seq: 1, #new-token: 3732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 7, #queue-req: 371
- 2025-07-20 15:47:07,595 - __main__ - INFO - sglang running req: 7 queue req: 371
- 2025-07-20 15:47:08,256 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
- 2025-07-20 15:47:08,387 - sglang - INFO - [2025-07-20 15:47:08] Exception in TokenizerManager:
- 2025-07-20 15:47:08,388 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:08,388 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:08,388 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:08,388 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:08,388 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:08,389 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:08,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:08,389 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:08,389 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:08,389 - sglang - INFO -
- 2025-07-20 15:47:10,098 - sglang - INFO - [2025-07-20 15:47:10 TP0] Decode batch. #running-req: 8, #token: 28996, token usage: 0.76, gen throughput (token/s): 115.40, #queue-req: 451
- 2025-07-20 15:47:10,098 - __main__ - INFO - sglang running req: 8 queue req: 451
- 2025-07-20 15:47:10,735 - __main__ - INFO - Finished TaskGroup for worker on 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
- 2025-07-20 15:47:10,735 - __main__ - INFO - Got 1 docs for 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
- 2025-07-20 15:47:10,737 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-07-20 15:47:10,759 - sglang - INFO - [2025-07-20 15:47:10 TP0] Prefill batch. #new-seq: 2, #new-token: 4253, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 7, #queue-req: 478
- 2025-07-20 15:47:10,759 - __main__ - INFO - sglang running req: 7 queue req: 478
- 2025-07-20 15:47:11,509 - sglang - INFO - [2025-07-20 15:47:11] ERROR: Exception in ASGI application
- 2025-07-20 15:47:11,509 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:11,509 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:11,509 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:11,510 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:11,510 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:11,510 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:11,511 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:11,511 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:11,511 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:11,511 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:11,511 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:11,511 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:11,511 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:11,512 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:11,512 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:11,512 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:11,512 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:11,512 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:11,512 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:11,513 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:11,513 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:11,513 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:11,513 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,513 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:11,514 - __main__ - WARNING - ValueError on attempt 1 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:11,653 - __main__ - WARNING - ValueError on attempt 0 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:11,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:11,767 - sglang - INFO - [2025-07-20 15:47:11] Exception in TokenizerManager:
- 2025-07-20 15:47:11,767 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:11,767 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:11,767 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:11,767 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,767 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:11,768 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:11,768 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:11,768 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:11,768 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:11,768 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:11,768 - sglang - INFO -
- 2025-07-20 15:47:11,770 - sglang - INFO - [2025-07-20 15:47:11] ERROR: Exception in ASGI application
- 2025-07-20 15:47:11,770 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:11,770 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:11,770 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:11,770 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,770 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:11,770 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:11,771 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:11,771 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:11,771 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:11,772 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:11,772 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:11,772 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:11,772 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:11,772 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:11,772 - sglang - INFO - raise exc
- 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:11,772 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:11,773 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:11,773 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:11,773 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:11,773 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:11,773 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:11,774 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:11,774 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:11,774 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:11,774 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:11,775 - __main__ - WARNING - ValueError on attempt 2 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:12,085 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:12,117 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
- 2025-07-20 15:47:12,117 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:12,117 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:12,117 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:12,117 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,118 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:12,118 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:12,118 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:12,118 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:12,118 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:12,118 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:12,118 - sglang - INFO -
- 2025-07-20 15:47:12,120 - sglang - INFO - [2025-07-20 15:47:12] ERROR: Exception in ASGI application
- 2025-07-20 15:47:12,120 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:12,120 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:12,120 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:12,120 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:12,120 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:12,120 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:12,121 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:12,121 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:12,121 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:12,121 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:12,121 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:12,121 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:12,121 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:12,122 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:12,122 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:12,122 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:12,122 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:12,122 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:12,122 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:12,122 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:12,122 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:12,122 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:12,123 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:12,123 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:12,123 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:12,123 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:12,123 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:12,124 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,124 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:12,124 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:12,124 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,124 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:12,132 - __main__ - WARNING - ValueError on attempt 3 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:12,592 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:12,620 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
- 2025-07-20 15:47:12,620 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:12,621 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:12,621 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:12,621 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:12,621 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:12,621 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:12,622 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:12,622 - sglang - INFO -
- 2025-07-20 15:47:12,634 - sglang - INFO - [2025-07-20 15:47:12] ERROR: Exception in ASGI application
- 2025-07-20 15:47:12,634 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:12,634 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:12,634 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:12,634 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,634 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:12,634 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:12,635 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:12,635 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:12,635 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:12,636 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:12,636 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:12,636 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:12,636 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:12,636 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:12,636 - sglang - INFO - raise exc
- 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:12,636 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:12,637 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:12,637 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:12,637 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:12,637 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:12,637 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:12,638 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:12,638 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:12,638 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,638 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:12,638 - __main__ - WARNING - ValueError on attempt 4 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:12,936 - sglang - INFO - [2025-07-20 15:47:12 TP0] Decode batch. #running-req: 9, #token: 27603, token usage: 0.73, gen throughput (token/s): 119.10, #queue-req: 520
- 2025-07-20 15:47:12,936 - __main__ - INFO - sglang running req: 9 queue req: 520
- 2025-07-20 15:47:12,965 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:12,999 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
- 2025-07-20 15:47:12,999 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:12,999 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:12,999 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:12,999 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:12,999 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:13,000 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:13,000 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,000 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:13,000 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:13,000 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:13,000 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:13,000 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:13,000 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:13,000 - sglang - INFO -
- 2025-07-20 15:47:13,004 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
- 2025-07-20 15:47:13,004 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:13,004 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:13,004 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:13,004 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,004 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:13,005 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:13,005 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,006 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,006 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:13,006 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:13,006 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:13,006 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:13,006 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,006 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,007 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:13,007 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:13,007 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:13,007 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:13,007 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:13,007 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:13,008 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:13,008 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:13,008 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,008 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:13,009 - __main__ - WARNING - ValueError on attempt 5 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:13,362 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:13,390 - sglang - INFO - [2025-07-20 15:47:13] Exception in TokenizerManager:
- 2025-07-20 15:47:13,390 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:13,390 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:13,390 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:13,390 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:13,391 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:13,391 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:13,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:13,391 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:13,391 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:13,391 - sglang - INFO -
- 2025-07-20 15:47:13,394 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
- 2025-07-20 15:47:13,395 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:13,395 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:13,395 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:13,395 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:13,395 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:13,395 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:13,395 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:13,395 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:13,396 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,396 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,396 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:13,396 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:13,396 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:13,397 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,397 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,397 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:13,397 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:13,397 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:13,397 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:13,398 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:13,398 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:13,398 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:13,398 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:13,398 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:13,399 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,399 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:13,399 - __main__ - WARNING - ValueError on attempt 6 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:13,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
- 2025-07-20 15:47:13,766 - sglang - INFO - [2025-07-20 15:47:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1665, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 519
- 2025-07-20 15:47:13,766 - __main__ - INFO - sglang running req: 8 queue req: 519
- 2025-07-20 15:47:13,786 - sglang - INFO - [2025-07-20 15:47:13] Exception in TokenizerManager:
- 2025-07-20 15:47:13,786 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
- 2025-07-20 15:47:13,786 - sglang - INFO - process_result = image_processor(image)
- 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
- 2025-07-20 15:47:13,786 - sglang - INFO - return self.preprocess(images, **kwargs)
- 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
- 2025-07-20 15:47:13,786 - sglang - INFO - patches, image_grid_thw = self._preprocess(
- 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
- 2025-07-20 15:47:13,786 - sglang - INFO - resized_height, resized_width = smart_resize(
- 2025-07-20 15:47:13,787 - sglang - INFO - ^^^^^^^^^^^^^
- 2025-07-20 15:47:13,787 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
- 2025-07-20 15:47:13,787 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
- 2025-07-20 15:47:13,787 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
- 2025-07-20 15:47:13,787 - sglang - INFO -
- 2025-07-20 15:47:13,791 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
- 2025-07-20 15:47:13,791 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
- 2025-07-20 15:47:13,791 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
- 2025-07-20 15:47:13,791 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
- 2025-07-20 15:47:13,791 - sglang - INFO - return await self.app(scope, receive, send)
- 2025-07-20 15:47:13,791 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
- 2025-07-20 15:47:13,791 - sglang - INFO - await super().__call__(scope, receive, send)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
- 2025-07-20 15:47:13,792 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
- 2025-07-20 15:47:13,792 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
- 2025-07-20 15:47:13,792 - sglang - INFO - await self.app(scope, receive, _send)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
- 2025-07-20 15:47:13,792 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
- 2025-07-20 15:47:13,792 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,792 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,792 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
- 2025-07-20 15:47:13,793 - sglang - INFO - await self.middleware_stack(scope, receive, send)
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
- 2025-07-20 15:47:13,793 - sglang - INFO - await route.handle(scope, receive, send)
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
- 2025-07-20 15:47:13,793 - sglang - INFO - await self.app(scope, receive, send)
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
- 2025-07-20 15:47:13,793 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
- 2025-07-20 15:47:13,793 - sglang - INFO - raise exc
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
- 2025-07-20 15:47:13,793 - sglang - INFO - await app(scope, receive, sender)
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
- 2025-07-20 15:47:13,793 - sglang - INFO - response = await f(request)
- 2025-07-20 15:47:13,793 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
- 2025-07-20 15:47:13,794 - sglang - INFO - raw_response = await run_endpoint_function(
- 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
- 2025-07-20 15:47:13,794 - sglang - INFO - return await dependant.call(**values)
- 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
- 2025-07-20 15:47:13,794 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
- 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
- 2025-07-20 15:47:13,794 - sglang - INFO - ret = await tokenizer_manager.generate_request(
- 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
- 2025-07-20 15:47:13,794 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
- 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
- 2025-07-20 15:47:13,794 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
- 2025-07-20 15:47:13,795 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,795 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
- 2025-07-20 15:47:13,795 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
- 2025-07-20 15:47:13,795 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:47:13,795 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
- 2025-07-20 15:47:13,795 - __main__ - WARNING - ValueError on attempt 7 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
- 2025-07-20 15:47:13,796 - __main__ - ERROR - Failed to process tests/gnarly_pdfs/skinnypage.pdf-2 after 8 attempts.
- 2025-07-20 15:47:14,614 - sglang - INFO - [2025-07-20 15:47:14 TP0] Decode batch. #running-req: 9, #token: 27427, token usage: 0.72, gen throughput (token/s): 213.85, #queue-req: 519
- 2025-07-20 15:47:14,615 - __main__ - INFO - sglang running req: 9 queue req: 519
- 2025-07-20 15:47:14,745 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:14,746 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 30.85 137.88
- finished_output_tokens 11.83 52.86
- sglang_input_tokens 904.76 888.36
- sglang_output_tokens 259.71 263.92
- 2025-07-20 15:47:14,746 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 1 | 2 | 529
- 2025-07-20 15:47:14,907 - sglang - INFO - [2025-07-20 15:47:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2652, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 8, #queue-req: 518
- 2025-07-20 15:47:14,907 - __main__ - INFO - sglang running req: 8 queue req: 518
- 2025-07-20 15:47:16,391 - sglang - INFO - [2025-07-20 15:47:16 TP0] Decode batch. #running-req: 9, #token: 28177, token usage: 0.74, gen throughput (token/s): 202.06, #queue-req: 518
- 2025-07-20 15:47:16,391 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:16,430 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:16,788 - __main__ - WARNING - ValueError on attempt 1 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:17,385 - sglang - INFO - [2025-07-20 15:47:17 TP0] Decode batch. #running-req: 9, #token: 28537, token usage: 0.75, gen throughput (token/s): 362.07, #queue-req: 518
- 2025-07-20 15:47:17,386 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:18,401 - sglang - INFO - [2025-07-20 15:47:18 TP0] Decode batch. #running-req: 9, #token: 28897, token usage: 0.76, gen throughput (token/s): 354.31, #queue-req: 518
- 2025-07-20 15:47:18,402 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:19,510 - sglang - INFO - [2025-07-20 15:47:19 TP0] Decode batch. #running-req: 9, #token: 29257, token usage: 0.77, gen throughput (token/s): 324.66, #queue-req: 518
- 2025-07-20 15:47:19,511 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:20,500 - sglang - INFO - [2025-07-20 15:47:20 TP0] Decode batch. #running-req: 9, #token: 29617, token usage: 0.78, gen throughput (token/s): 363.90, #queue-req: 518
- 2025-07-20 15:47:20,500 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:21,480 - sglang - INFO - [2025-07-20 15:47:21 TP0] Decode batch. #running-req: 9, #token: 29977, token usage: 0.79, gen throughput (token/s): 367.29, #queue-req: 518
- 2025-07-20 15:47:21,480 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:21,784 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:22,060 - __main__ - WARNING - ValueError on attempt 2 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:22,124 - __main__ - WARNING - JSON decode error on attempt 2 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:47:22,142 - sglang - INFO - [2025-07-20 15:47:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 517
- 2025-07-20 15:47:22,142 - __main__ - INFO - sglang running req: 8 queue req: 517
- 2025-07-20 15:47:22,402 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
- 2025-07-20 15:47:23,301 - sglang - INFO - [2025-07-20 15:47:23 TP0] Decode batch. #running-req: 9, #token: 29055, token usage: 0.76, gen throughput (token/s): 197.12, #queue-req: 518
- 2025-07-20 15:47:23,301 - __main__ - INFO - sglang running req: 9 queue req: 518
- 2025-07-20 15:47:24,101 - sglang - INFO - [2025-07-20 15:47:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1677, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 517
- 2025-07-20 15:47:24,101 - __main__ - INFO - sglang running req: 8 queue req: 517
- 2025-07-20 15:47:24,747 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:24,747 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 30.62 137.88
- finished_output_tokens 11.74 52.86
- sglang_input_tokens 902.88 869.90
- sglang_output_tokens 259.10 259.87
- 2025-07-20 15:47:24,748 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 1 | 4 | 529
- 2025-07-20 15:47:24,937 - sglang - INFO - [2025-07-20 15:47:24 TP0] Decode batch. #running-req: 9, #token: 29106, token usage: 0.77, gen throughput (token/s): 219.44, #queue-req: 517
- 2025-07-20 15:47:24,937 - __main__ - INFO - sglang running req: 9 queue req: 517
- 2025-07-20 15:47:25,318 - __main__ - WARNING - JSON decode error on attempt 2 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 15:47:25,334 - sglang - INFO - [2025-07-20 15:47:25 TP0] Prefill batch. #new-seq: 2, #new-token: 5740, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 8, #queue-req: 515
- 2025-07-20 15:47:25,334 - __main__ - INFO - sglang running req: 8 queue req: 515
- 2025-07-20 15:47:25,589 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
- 2025-07-20 15:47:27,163 - sglang - INFO - [2025-07-20 15:47:27 TP0] Prefill batch. #new-seq: 1, #new-token: 3205, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 515
- 2025-07-20 15:47:27,163 - __main__ - INFO - sglang running req: 9 queue req: 515
- 2025-07-20 15:47:27,285 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:27,533 - __main__ - WARNING - ValueError on attempt 3 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:28,698 - sglang - INFO - [2025-07-20 15:47:28 TP0] Decode batch. #running-req: 10, #token: 32321, token usage: 0.85, gen throughput (token/s): 101.58, #queue-req: 515
- 2025-07-20 15:47:28,698 - __main__ - INFO - sglang running req: 10 queue req: 515
- 2025-07-20 15:47:29,689 - sglang - INFO - [2025-07-20 15:47:29 TP0] Decode batch. #running-req: 10, #token: 32721, token usage: 0.86, gen throughput (token/s): 403.49, #queue-req: 515
- 2025-07-20 15:47:29,689 - __main__ - INFO - sglang running req: 10 queue req: 515
- 2025-07-20 15:47:30,211 - sglang - INFO - [2025-07-20 15:47:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2912, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 514
- 2025-07-20 15:47:30,211 - __main__ - INFO - sglang running req: 9 queue req: 514
- 2025-07-20 15:47:31,546 - sglang - INFO - [2025-07-20 15:47:31 TP0] Decode batch. #running-req: 10, #token: 31594, token usage: 0.83, gen throughput (token/s): 214.81, #queue-req: 514
- 2025-07-20 15:47:31,547 - __main__ - INFO - sglang running req: 10 queue req: 514
- 2025-07-20 15:47:32,494 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:32,667 - sglang - INFO - [2025-07-20 15:47:32 TP0] Decode batch. #running-req: 10, #token: 31994, token usage: 0.84, gen throughput (token/s): 357.05, #queue-req: 514
- 2025-07-20 15:47:32,667 - __main__ - INFO - sglang running req: 10 queue req: 514
- 2025-07-20 15:47:32,992 - __main__ - WARNING - ValueError on attempt 4 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:33,735 - sglang - INFO - [2025-07-20 15:47:33 TP0] Decode batch. #running-req: 10, #token: 32394, token usage: 0.85, gen throughput (token/s): 374.52, #queue-req: 514
- 2025-07-20 15:47:33,735 - __main__ - INFO - sglang running req: 10 queue req: 514
- 2025-07-20 15:47:34,749 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:34,749 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 30.39 137.88
- finished_output_tokens 11.65 52.86
- sglang_input_tokens 903.53 859.63
- sglang_output_tokens 259.07 258.80
- 2025-07-20 15:47:34,749 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 1 | 7 | 529
- 2025-07-20 15:47:34,749 - sglang - INFO - [2025-07-20 15:47:34 TP0] Decode batch. #running-req: 9, #token: 30869, token usage: 0.81, gen throughput (token/s): 388.23, #queue-req: 514
- 2025-07-20 15:47:34,750 - __main__ - INFO - sglang running req: 9 queue req: 514
- 2025-07-20 15:47:35,732 - sglang - INFO - [2025-07-20 15:47:35 TP0] Decode batch. #running-req: 9, #token: 31229, token usage: 0.82, gen throughput (token/s): 366.41, #queue-req: 514
- 2025-07-20 15:47:35,732 - __main__ - INFO - sglang running req: 9 queue req: 514
- 2025-07-20 15:47:36,517 - sglang - INFO - [2025-07-20 15:47:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2586, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 513
- 2025-07-20 15:47:36,518 - __main__ - INFO - sglang running req: 8 queue req: 513
- 2025-07-20 15:47:37,533 - sglang - INFO - [2025-07-20 15:47:37 TP0] Decode batch. #running-req: 9, #token: 29582, token usage: 0.78, gen throughput (token/s): 199.39, #queue-req: 513
- 2025-07-20 15:47:37,533 - __main__ - INFO - sglang running req: 9 queue req: 513
- 2025-07-20 15:47:37,828 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:38,074 - __main__ - WARNING - ValueError on attempt 5 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:38,519 - sglang - INFO - [2025-07-20 15:47:38 TP0] Decode batch. #running-req: 9, #token: 29942, token usage: 0.79, gen throughput (token/s): 365.13, #queue-req: 513
- 2025-07-20 15:47:38,519 - __main__ - INFO - sglang running req: 9 queue req: 513
- 2025-07-20 15:47:39,571 - sglang - INFO - [2025-07-20 15:47:39 TP0] Decode batch. #running-req: 9, #token: 30302, token usage: 0.80, gen throughput (token/s): 342.16, #queue-req: 513
- 2025-07-20 15:47:39,571 - __main__ - INFO - sglang running req: 9 queue req: 513
- 2025-07-20 15:47:39,794 - sglang - INFO - [2025-07-20 15:47:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2147, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 512
- 2025-07-20 15:47:39,794 - __main__ - INFO - sglang running req: 8 queue req: 512
- 2025-07-20 15:47:41,477 - sglang - INFO - [2025-07-20 15:47:41 TP0] Decode batch. #running-req: 9, #token: 28236, token usage: 0.74, gen throughput (token/s): 188.38, #queue-req: 512
- 2025-07-20 15:47:41,477 - __main__ - INFO - sglang running req: 9 queue req: 512
- 2025-07-20 15:47:42,135 - sglang - INFO - [2025-07-20 15:47:42 TP0] Prefill batch. #new-seq: 1, #new-token: 3094, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 511
- 2025-07-20 15:47:42,135 - __main__ - INFO - sglang running req: 8 queue req: 511
- 2025-07-20 15:47:43,397 - sglang - INFO - [2025-07-20 15:47:43 TP0] Decode batch. #running-req: 9, #token: 28425, token usage: 0.75, gen throughput (token/s): 186.91, #queue-req: 511
- 2025-07-20 15:47:43,398 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:43,614 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:43,949 - __main__ - WARNING - ValueError on attempt 6 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:44,382 - sglang - INFO - [2025-07-20 15:47:44 TP0] Decode batch. #running-req: 9, #token: 28785, token usage: 0.76, gen throughput (token/s): 365.76, #queue-req: 511
- 2025-07-20 15:47:44,382 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:44,750 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:44,750 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 30.17 137.88
- finished_output_tokens 11.57 52.86
- sglang_input_tokens 904.02 870.38
- sglang_output_tokens 259.17 262.51
- 2025-07-20 15:47:44,750 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 1 | 10 | 529
- 2025-07-20 15:47:45,371 - sglang - INFO - [2025-07-20 15:47:45 TP0] Decode batch. #running-req: 9, #token: 29145, token usage: 0.77, gen throughput (token/s): 363.94, #queue-req: 511
- 2025-07-20 15:47:45,371 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:46,355 - sglang - INFO - [2025-07-20 15:47:46 TP0] Decode batch. #running-req: 9, #token: 29505, token usage: 0.78, gen throughput (token/s): 365.85, #queue-req: 511
- 2025-07-20 15:47:46,355 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:47,338 - sglang - INFO - [2025-07-20 15:47:47 TP0] Decode batch. #running-req: 9, #token: 29865, token usage: 0.79, gen throughput (token/s): 366.14, #queue-req: 511
- 2025-07-20 15:47:47,338 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:48,319 - sglang - INFO - [2025-07-20 15:47:48 TP0] Decode batch. #running-req: 9, #token: 30225, token usage: 0.80, gen throughput (token/s): 366.86, #queue-req: 511
- 2025-07-20 15:47:48,320 - __main__ - INFO - sglang running req: 9 queue req: 511
- 2025-07-20 15:47:48,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
- 2025-07-20 15:47:48,734 - __main__ - WARNING - ValueError on attempt 7 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-07-20 15:47:48,735 - __main__ - ERROR - Failed to process tests/gnarly_pdfs/map1.pdf-1 after 8 attempts.
- 2025-07-20 15:47:49,112 - __main__ - ERROR - Document tests/gnarly_pdfs/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
- 2025-07-20 15:47:49,113 - sglang - INFO - [2025-07-20 15:47:48 TP0] Prefill batch. #new-seq: 1, #new-token: 3020, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 510
- 2025-07-20 15:47:49,113 - __main__ - INFO - sglang running req: 8 queue req: 510
- 2025-07-20 15:47:50,207 - sglang - INFO - [2025-07-20 15:47:50 TP0] Decode batch. #running-req: 9, #token: 29711, token usage: 0.78, gen throughput (token/s): 190.20, #queue-req: 510
- 2025-07-20 15:47:50,207 - __main__ - INFO - sglang running req: 9 queue req: 510
- 2025-07-20 15:47:50,304 - sglang - INFO - [2025-07-20 15:47:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2297, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 509
- 2025-07-20 15:47:50,304 - __main__ - INFO - sglang running req: 8 queue req: 509
- 2025-07-20 15:47:51,308 - sglang - INFO - [2025-07-20 15:47:51 TP0] Prefill batch. #new-seq: 2, #new-token: 4138, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 507
- 2025-07-20 15:47:51,309 - __main__ - INFO - sglang running req: 8 queue req: 507
- 2025-07-20 15:47:53,095 - sglang - INFO - [2025-07-20 15:47:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 506
- 2025-07-20 15:47:53,095 - __main__ - INFO - sglang running req: 9 queue req: 506
- 2025-07-20 15:47:54,208 - sglang - INFO - [2025-07-20 15:47:54 TP0] Decode batch. #running-req: 10, #token: 28558, token usage: 0.75, gen throughput (token/s): 95.72, #queue-req: 506
- 2025-07-20 15:47:54,208 - __main__ - INFO - sglang running req: 10 queue req: 506
- 2025-07-20 15:47:54,429 - sglang - INFO - [2025-07-20 15:47:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2889, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 505
- 2025-07-20 15:47:54,430 - __main__ - INFO - sglang running req: 9 queue req: 505
- 2025-07-20 15:47:54,751 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:47:54,751 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 29.95 137.88
- finished_output_tokens 11.48 52.86
- sglang_input_tokens 907.75 880.16
- sglang_output_tokens 259.92 263.96
- 2025-07-20 15:47:54,751 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 15 | 529
- 2025-07-20 15:47:56,021 - sglang - INFO - [2025-07-20 15:47:56 TP0] Decode batch. #running-req: 10, #token: 28804, token usage: 0.76, gen throughput (token/s): 220.13, #queue-req: 505
- 2025-07-20 15:47:56,021 - __main__ - INFO - sglang running req: 10 queue req: 505
- 2025-07-20 15:47:57,004 - sglang - INFO - [2025-07-20 15:47:57 TP0] Decode batch. #running-req: 10, #token: 29204, token usage: 0.77, gen throughput (token/s): 406.94, #queue-req: 505
- 2025-07-20 15:47:57,004 - __main__ - INFO - sglang running req: 10 queue req: 505
- 2025-07-20 15:47:57,423 - sglang - INFO - [2025-07-20 15:47:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2650, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 504
- 2025-07-20 15:47:57,423 - __main__ - INFO - sglang running req: 9 queue req: 504
- 2025-07-20 15:47:58,772 - sglang - INFO - [2025-07-20 15:47:58 TP0] Decode batch. #running-req: 10, #token: 29658, token usage: 0.78, gen throughput (token/s): 225.56, #queue-req: 504
- 2025-07-20 15:47:58,773 - __main__ - INFO - sglang running req: 10 queue req: 504
- 2025-07-20 15:47:58,944 - sglang - INFO - [2025-07-20 15:47:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2834, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 503
- 2025-07-20 15:47:58,945 - __main__ - INFO - sglang running req: 9 queue req: 503
- 2025-07-20 15:48:00,615 - sglang - INFO - [2025-07-20 15:48:00 TP0] Decode batch. #running-req: 10, #token: 29194, token usage: 0.77, gen throughput (token/s): 216.57, #queue-req: 503
- 2025-07-20 15:48:00,615 - __main__ - INFO - sglang running req: 10 queue req: 503
- 2025-07-20 15:48:01,599 - sglang - INFO - [2025-07-20 15:48:01 TP0] Decode batch. #running-req: 10, #token: 29594, token usage: 0.78, gen throughput (token/s): 406.33, #queue-req: 503
- 2025-07-20 15:48:01,600 - __main__ - INFO - sglang running req: 10 queue req: 503
- 2025-07-20 15:48:02,583 - sglang - INFO - [2025-07-20 15:48:02 TP0] Decode batch. #running-req: 10, #token: 29994, token usage: 0.79, gen throughput (token/s): 406.46, #queue-req: 503
- 2025-07-20 15:48:02,584 - __main__ - INFO - sglang running req: 10 queue req: 503
- 2025-07-20 15:48:03,397 - sglang - INFO - [2025-07-20 15:48:03 TP0] Prefill batch. #new-seq: 1, #new-token: 3545, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 502
- 2025-07-20 15:48:03,397 - __main__ - INFO - sglang running req: 9 queue req: 502
- 2025-07-20 15:48:04,574 - sglang - INFO - [2025-07-20 15:48:04 TP0] Decode batch. #running-req: 10, #token: 31867, token usage: 0.84, gen throughput (token/s): 200.43, #queue-req: 502
- 2025-07-20 15:48:04,574 - __main__ - INFO - sglang running req: 10 queue req: 502
- 2025-07-20 15:48:04,752 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:04,753 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 29.74 137.88
- finished_output_tokens 11.40 52.86
- sglang_input_tokens 906.13 863.87
- sglang_output_tokens 259.15 257.14
- 2025-07-20 15:48:04,753 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 18 | 529
- 2025-07-20 15:48:05,563 - sglang - INFO - [2025-07-20 15:48:05 TP0] Decode batch. #running-req: 10, #token: 32267, token usage: 0.85, gen throughput (token/s): 404.40, #queue-req: 502
- 2025-07-20 15:48:05,563 - __main__ - INFO - sglang running req: 10 queue req: 502
- 2025-07-20 15:48:06,280 - sglang - INFO - [2025-07-20 15:48:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1258, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 501
- 2025-07-20 15:48:06,280 - __main__ - INFO - sglang running req: 9 queue req: 501
- 2025-07-20 15:48:07,091 - sglang - INFO - [2025-07-20 15:48:07 TP0] Decode batch. #running-req: 10, #token: 30019, token usage: 0.79, gen throughput (token/s): 261.12, #queue-req: 501
- 2025-07-20 15:48:07,092 - __main__ - INFO - sglang running req: 10 queue req: 501
- 2025-07-20 15:48:07,668 - sglang - INFO - [2025-07-20 15:48:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2419, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 500
- 2025-07-20 15:48:07,668 - __main__ - INFO - sglang running req: 9 queue req: 500
- 2025-07-20 15:48:08,688 - sglang - INFO - [2025-07-20 15:48:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2514, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 499
- 2025-07-20 15:48:08,689 - __main__ - INFO - sglang running req: 9 queue req: 499
- 2025-07-20 15:48:09,650 - sglang - INFO - [2025-07-20 15:48:09 TP0] Decode batch. #running-req: 10, #token: 30279, token usage: 0.80, gen throughput (token/s): 155.53, #queue-req: 499
- 2025-07-20 15:48:09,651 - __main__ - INFO - sglang running req: 10 queue req: 499
- 2025-07-20 15:48:10,637 - sglang - INFO - [2025-07-20 15:48:10 TP0] Decode batch. #running-req: 10, #token: 30679, token usage: 0.81, gen throughput (token/s): 405.59, #queue-req: 499
- 2025-07-20 15:48:10,637 - __main__ - INFO - sglang running req: 10 queue req: 499
- 2025-07-20 15:48:11,623 - sglang - INFO - [2025-07-20 15:48:11 TP0] Decode batch. #running-req: 10, #token: 31079, token usage: 0.82, gen throughput (token/s): 405.54, #queue-req: 499
- 2025-07-20 15:48:11,623 - __main__ - INFO - sglang running req: 10 queue req: 499
- 2025-07-20 15:48:12,611 - sglang - INFO - [2025-07-20 15:48:12 TP0] Decode batch. #running-req: 10, #token: 31479, token usage: 0.83, gen throughput (token/s): 404.73, #queue-req: 499
- 2025-07-20 15:48:12,611 - __main__ - INFO - sglang running req: 10 queue req: 499
- 2025-07-20 15:48:13,604 - sglang - INFO - [2025-07-20 15:48:13 TP0] Decode batch. #running-req: 10, #token: 31879, token usage: 0.84, gen throughput (token/s): 402.98, #queue-req: 499
- 2025-07-20 15:48:13,604 - __main__ - INFO - sglang running req: 10 queue req: 499
- 2025-07-20 15:48:14,147 - sglang - INFO - [2025-07-20 15:48:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2365, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 498
- 2025-07-20 15:48:14,148 - __main__ - INFO - sglang running req: 9 queue req: 498
- 2025-07-20 15:48:14,754 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:14,754 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 29.53 137.88
- finished_output_tokens 11.32 52.86
- sglang_input_tokens 906.42 855.53
- sglang_output_tokens 259.08 254.82
- 2025-07-20 15:48:14,754 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 22 | 529
- 2025-07-20 15:48:14,978 - sglang - INFO - [2025-07-20 15:48:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 497
- 2025-07-20 15:48:14,978 - __main__ - INFO - sglang running req: 9 queue req: 497
- 2025-07-20 15:48:16,188 - sglang - INFO - [2025-07-20 15:48:16 TP0] Decode batch. #running-req: 10, #token: 31196, token usage: 0.82, gen throughput (token/s): 154.00, #queue-req: 497
- 2025-07-20 15:48:16,189 - __main__ - INFO - sglang running req: 10 queue req: 497
- 2025-07-20 15:48:17,184 - sglang - INFO - [2025-07-20 15:48:17 TP0] Decode batch. #running-req: 10, #token: 31596, token usage: 0.83, gen throughput (token/s): 401.78, #queue-req: 497
- 2025-07-20 15:48:17,184 - __main__ - INFO - sglang running req: 10 queue req: 497
- 2025-07-20 15:48:18,181 - sglang - INFO - [2025-07-20 15:48:18 TP0] Decode batch. #running-req: 10, #token: 31996, token usage: 0.84, gen throughput (token/s): 401.23, #queue-req: 497
- 2025-07-20 15:48:18,181 - __main__ - INFO - sglang running req: 10 queue req: 497
- 2025-07-20 15:48:18,256 - sglang - INFO - [2025-07-20 15:48:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2877, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 496
- 2025-07-20 15:48:18,256 - __main__ - INFO - sglang running req: 9 queue req: 496
- 2025-07-20 15:48:19,841 - sglang - INFO - [2025-07-20 15:48:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2407, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 495
- 2025-07-20 15:48:19,842 - __main__ - INFO - sglang running req: 9 queue req: 495
- 2025-07-20 15:48:20,807 - sglang - INFO - [2025-07-20 15:48:20 TP0] Decode batch. #running-req: 10, #token: 30299, token usage: 0.80, gen throughput (token/s): 151.57, #queue-req: 495
- 2025-07-20 15:48:20,807 - __main__ - INFO - sglang running req: 10 queue req: 495
- 2025-07-20 15:48:21,791 - sglang - INFO - [2025-07-20 15:48:21 TP0] Decode batch. #running-req: 10, #token: 30699, token usage: 0.81, gen throughput (token/s): 406.49, #queue-req: 495
- 2025-07-20 15:48:21,791 - __main__ - INFO - sglang running req: 10 queue req: 495
- 2025-07-20 15:48:22,782 - sglang - INFO - [2025-07-20 15:48:22 TP0] Decode batch. #running-req: 10, #token: 31099, token usage: 0.82, gen throughput (token/s): 403.58, #queue-req: 495
- 2025-07-20 15:48:22,782 - __main__ - INFO - sglang running req: 10 queue req: 495
- 2025-07-20 15:48:22,906 - sglang - INFO - [2025-07-20 15:48:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2703, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 494
- 2025-07-20 15:48:22,906 - __main__ - INFO - sglang running req: 9 queue req: 494
- 2025-07-20 15:48:24,603 - sglang - INFO - [2025-07-20 15:48:24 TP0] Decode batch. #running-req: 10, #token: 30690, token usage: 0.81, gen throughput (token/s): 219.08, #queue-req: 494
- 2025-07-20 15:48:24,603 - __main__ - INFO - sglang running req: 10 queue req: 494
- 2025-07-20 15:48:24,756 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:24,756 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 29.32 137.88
- finished_output_tokens 11.24 52.86
- sglang_input_tokens 907.82 860.97
- sglang_output_tokens 259.20 255.92
- 2025-07-20 15:48:24,756 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 26 | 529
- 2025-07-20 15:48:25,589 - sglang - INFO - [2025-07-20 15:48:25 TP0] Decode batch. #running-req: 10, #token: 31090, token usage: 0.82, gen throughput (token/s): 405.84, #queue-req: 494
- 2025-07-20 15:48:25,589 - __main__ - INFO - sglang running req: 10 queue req: 494
- 2025-07-20 15:48:26,576 - sglang - INFO - [2025-07-20 15:48:26 TP0] Decode batch. #running-req: 10, #token: 31490, token usage: 0.83, gen throughput (token/s): 405.25, #queue-req: 494
- 2025-07-20 15:48:26,576 - __main__ - INFO - sglang running req: 10 queue req: 494
- 2025-07-20 15:48:27,095 - sglang - INFO - [2025-07-20 15:48:27 TP0] Prefill batch. #new-seq: 1, #new-token: 3260, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 493
- 2025-07-20 15:48:27,095 - __main__ - INFO - sglang running req: 9 queue req: 493
- 2025-07-20 15:48:28,503 - sglang - INFO - [2025-07-20 15:48:28 TP0] Decode batch. #running-req: 9, #token: 29092, token usage: 0.77, gen throughput (token/s): 205.51, #queue-req: 493
- 2025-07-20 15:48:28,503 - __main__ - INFO - sglang running req: 9 queue req: 493
- 2025-07-20 15:48:29,477 - sglang - INFO - [2025-07-20 15:48:29 TP0] Decode batch. #running-req: 9, #token: 29452, token usage: 0.78, gen throughput (token/s): 369.34, #queue-req: 493
- 2025-07-20 15:48:29,478 - __main__ - INFO - sglang running req: 9 queue req: 493
- 2025-07-20 15:48:30,519 - sglang - INFO - [2025-07-20 15:48:30 TP0] Decode batch. #running-req: 9, #token: 29812, token usage: 0.78, gen throughput (token/s): 345.47, #queue-req: 493
- 2025-07-20 15:48:30,520 - __main__ - INFO - sglang running req: 9 queue req: 493
- 2025-07-20 15:48:31,509 - sglang - INFO - [2025-07-20 15:48:31 TP0] Decode batch. #running-req: 9, #token: 30172, token usage: 0.79, gen throughput (token/s): 363.61, #queue-req: 493
- 2025-07-20 15:48:31,510 - __main__ - INFO - sglang running req: 9 queue req: 493
- 2025-07-20 15:48:31,631 - sglang - INFO - [2025-07-20 15:48:31 TP0] Prefill batch. #new-seq: 1, #new-token: 3887, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 492
- 2025-07-20 15:48:31,632 - __main__ - INFO - sglang running req: 8 queue req: 492
- 2025-07-20 15:48:33,025 - sglang - INFO - [2025-07-20 15:48:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 491
- 2025-07-20 15:48:33,025 - __main__ - INFO - sglang running req: 8 queue req: 491
- 2025-07-20 15:48:34,466 - sglang - INFO - [2025-07-20 15:48:34 TP0] Decode batch. #running-req: 9, #token: 30158, token usage: 0.79, gen throughput (token/s): 121.10, #queue-req: 491
- 2025-07-20 15:48:34,466 - __main__ - INFO - sglang running req: 9 queue req: 491
- 2025-07-20 15:48:34,757 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:34,757 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 29.11 137.88
- finished_output_tokens 11.16 52.86
- sglang_input_tokens 909.07 861.92
- sglang_output_tokens 259.10 256.07
- 2025-07-20 15:48:34,757 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 30 | 529
- 2025-07-20 15:48:35,103 - sglang - INFO - [2025-07-20 15:48:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2152, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 490
- 2025-07-20 15:48:35,104 - __main__ - INFO - sglang running req: 8 queue req: 490
- 2025-07-20 15:48:36,154 - sglang - INFO - [2025-07-20 15:48:36 TP0] Decode batch. #running-req: 9, #token: 29666, token usage: 0.78, gen throughput (token/s): 212.67, #queue-req: 490
- 2025-07-20 15:48:36,154 - __main__ - INFO - sglang running req: 9 queue req: 490
- 2025-07-20 15:48:37,131 - sglang - INFO - [2025-07-20 15:48:37 TP0] Decode batch. #running-req: 9, #token: 30026, token usage: 0.79, gen throughput (token/s): 368.24, #queue-req: 490
- 2025-07-20 15:48:37,132 - __main__ - INFO - sglang running req: 9 queue req: 490
- 2025-07-20 15:48:37,841 - sglang - INFO - [2025-07-20 15:48:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2571, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 489
- 2025-07-20 15:48:37,841 - __main__ - INFO - sglang running req: 8 queue req: 489
- 2025-07-20 15:48:38,958 - sglang - INFO - [2025-07-20 15:48:38 TP0] Decode batch. #running-req: 9, #token: 28848, token usage: 0.76, gen throughput (token/s): 196.57, #queue-req: 489
- 2025-07-20 15:48:38,958 - __main__ - INFO - sglang running req: 9 queue req: 489
- 2025-07-20 15:48:39,006 - sglang - INFO - [2025-07-20 15:48:39 TP0] Prefill batch. #new-seq: 2, #new-token: 3412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 487
- 2025-07-20 15:48:39,006 - __main__ - INFO - sglang running req: 8 queue req: 487
- 2025-07-20 15:48:40,664 - sglang - INFO - [2025-07-20 15:48:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 486
- 2025-07-20 15:48:40,664 - __main__ - INFO - sglang running req: 9 queue req: 486
- 2025-07-20 15:48:41,961 - sglang - INFO - [2025-07-20 15:48:41 TP0] Decode batch. #running-req: 9, #token: 24834, token usage: 0.65, gen throughput (token/s): 131.52, #queue-req: 486
- 2025-07-20 15:48:41,961 - __main__ - INFO - sglang running req: 9 queue req: 486
- 2025-07-20 15:48:41,961 - sglang - INFO - [2025-07-20 15:48:41 TP0] Prefill batch. #new-seq: 1, #new-token: 3680, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 485
- 2025-07-20 15:48:41,961 - __main__ - INFO - sglang running req: 9 queue req: 485
- 2025-07-20 15:48:43,582 - sglang - INFO - [2025-07-20 15:48:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2449, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 484
- 2025-07-20 15:48:43,582 - __main__ - INFO - sglang running req: 9 queue req: 484
- 2025-07-20 15:48:44,619 - sglang - INFO - [2025-07-20 15:48:44 TP0] Prefill batch. #new-seq: 1, #new-token: 3605, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 483
- 2025-07-20 15:48:44,619 - __main__ - INFO - sglang running req: 9 queue req: 483
- 2025-07-20 15:48:44,758 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:44,758 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 28.91 137.88
- finished_output_tokens 11.08 52.86
- sglang_input_tokens 914.85 875.84
- sglang_output_tokens 260.53 261.63
- 2025-07-20 15:48:44,758 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 37 | 529
- 2025-07-20 15:48:45,792 - sglang - INFO - [2025-07-20 15:48:45 TP0] Decode batch. #running-req: 10, #token: 30298, token usage: 0.80, gen throughput (token/s): 103.89, #queue-req: 483
- 2025-07-20 15:48:45,792 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:46,779 - sglang - INFO - [2025-07-20 15:48:46 TP0] Decode batch. #running-req: 10, #token: 30698, token usage: 0.81, gen throughput (token/s): 405.36, #queue-req: 483
- 2025-07-20 15:48:46,779 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:47,766 - sglang - INFO - [2025-07-20 15:48:47 TP0] Decode batch. #running-req: 10, #token: 31098, token usage: 0.82, gen throughput (token/s): 405.24, #queue-req: 483
- 2025-07-20 15:48:47,766 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:48,755 - sglang - INFO - [2025-07-20 15:48:48 TP0] Decode batch. #running-req: 10, #token: 31498, token usage: 0.83, gen throughput (token/s): 404.35, #queue-req: 483
- 2025-07-20 15:48:48,756 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:49,746 - sglang - INFO - [2025-07-20 15:48:49 TP0] Decode batch. #running-req: 10, #token: 31898, token usage: 0.84, gen throughput (token/s): 403.72, #queue-req: 483
- 2025-07-20 15:48:49,746 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:50,737 - sglang - INFO - [2025-07-20 15:48:50 TP0] Decode batch. #running-req: 10, #token: 32298, token usage: 0.85, gen throughput (token/s): 403.84, #queue-req: 483
- 2025-07-20 15:48:50,737 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:51,725 - sglang - INFO - [2025-07-20 15:48:51 TP0] Decode batch. #running-req: 10, #token: 32698, token usage: 0.86, gen throughput (token/s): 404.52, #queue-req: 483
- 2025-07-20 15:48:51,727 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:52,717 - sglang - INFO - [2025-07-20 15:48:52 TP0] Decode batch. #running-req: 10, #token: 33098, token usage: 0.87, gen throughput (token/s): 403.49, #queue-req: 483
- 2025-07-20 15:48:52,717 - __main__ - INFO - sglang running req: 10 queue req: 483
- 2025-07-20 15:48:53,613 - sglang - INFO - [2025-07-20 15:48:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2289, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 482
- 2025-07-20 15:48:53,613 - __main__ - INFO - sglang running req: 9 queue req: 482
- 2025-07-20 15:48:54,440 - sglang - INFO - [2025-07-20 15:48:54 TP0] Decode batch. #running-req: 10, #token: 32272, token usage: 0.85, gen throughput (token/s): 231.56, #queue-req: 482
- 2025-07-20 15:48:54,440 - __main__ - INFO - sglang running req: 10 queue req: 482
- 2025-07-20 15:48:54,760 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:48:54,760 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 28.71 137.88
- finished_output_tokens 11.01 52.86
- sglang_input_tokens 910.56 853.52
- sglang_output_tokens 259.10 257.10
- 2025-07-20 15:48:54,760 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 38 | 529
- 2025-07-20 15:48:55,431 - sglang - INFO - [2025-07-20 15:48:55 TP0] Decode batch. #running-req: 10, #token: 32672, token usage: 0.86, gen throughput (token/s): 403.38, #queue-req: 482
- 2025-07-20 15:48:55,432 - __main__ - INFO - sglang running req: 10 queue req: 482
- 2025-07-20 15:48:56,294 - sglang - INFO - [2025-07-20 15:48:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2634, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 8, #queue-req: 481
- 2025-07-20 15:48:56,294 - __main__ - INFO - sglang running req: 8 queue req: 481
- 2025-07-20 15:48:57,243 - sglang - INFO - [2025-07-20 15:48:57 TP0] Decode batch. #running-req: 9, #token: 30491, token usage: 0.80, gen throughput (token/s): 202.10, #queue-req: 481
- 2025-07-20 15:48:57,243 - __main__ - INFO - sglang running req: 9 queue req: 481
- 2025-07-20 15:48:57,586 - sglang - INFO - [2025-07-20 15:48:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 480
- 2025-07-20 15:48:57,586 - __main__ - INFO - sglang running req: 8 queue req: 480
- 2025-07-20 15:48:58,728 - sglang - INFO - [2025-07-20 15:48:58 TP0] Decode batch. #running-req: 9, #token: 27948, token usage: 0.74, gen throughput (token/s): 241.60, #queue-req: 480
- 2025-07-20 15:48:58,729 - __main__ - INFO - sglang running req: 9 queue req: 480
- 2025-07-20 15:48:59,092 - sglang - INFO - [2025-07-20 15:48:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2815, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 479
- 2025-07-20 15:48:59,092 - __main__ - INFO - sglang running req: 8 queue req: 479
- 2025-07-20 15:49:00,551 - sglang - INFO - [2025-07-20 15:49:00 TP0] Decode batch. #running-req: 9, #token: 29932, token usage: 0.79, gen throughput (token/s): 196.98, #queue-req: 479
- 2025-07-20 15:49:00,551 - __main__ - INFO - sglang running req: 9 queue req: 479
- 2025-07-20 15:49:01,536 - sglang - INFO - [2025-07-20 15:49:01 TP0] Decode batch. #running-req: 9, #token: 30292, token usage: 0.80, gen throughput (token/s): 365.35, #queue-req: 479
- 2025-07-20 15:49:01,537 - __main__ - INFO - sglang running req: 9 queue req: 479
- 2025-07-20 15:49:02,321 - sglang - INFO - [2025-07-20 15:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 4293, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 478
- 2025-07-20 15:49:02,322 - __main__ - INFO - sglang running req: 8 queue req: 478
- 2025-07-20 15:49:03,682 - sglang - INFO - [2025-07-20 15:49:03 TP0] Decode batch. #running-req: 9, #token: 31731, token usage: 0.84, gen throughput (token/s): 167.28, #queue-req: 478
- 2025-07-20 15:49:03,683 - __main__ - INFO - sglang running req: 9 queue req: 478
- 2025-07-20 15:49:04,672 - sglang - INFO - [2025-07-20 15:49:04 TP0] Decode batch. #running-req: 9, #token: 32091, token usage: 0.84, gen throughput (token/s): 363.89, #queue-req: 478
- 2025-07-20 15:49:04,672 - __main__ - INFO - sglang running req: 9 queue req: 478
- 2025-07-20 15:49:04,761 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:49:04,762 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 28.51 137.88
- finished_output_tokens 10.93 52.86
- sglang_input_tokens 911.97 854.97
- sglang_output_tokens 259.03 254.68
- 2025-07-20 15:49:04,762 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 43 | 529
- 2025-07-20 15:49:05,660 - sglang - INFO - [2025-07-20 15:49:05 TP0] Decode batch. #running-req: 9, #token: 29351, token usage: 0.77, gen throughput (token/s): 364.33, #queue-req: 478
- 2025-07-20 15:49:05,660 - __main__ - INFO - sglang running req: 9 queue req: 478
- 2025-07-20 15:49:06,590 - sglang - INFO - [2025-07-20 15:49:06 TP0] Decode batch. #running-req: 8, #token: 29671, token usage: 0.78, gen throughput (token/s): 343.76, #queue-req: 478
- 2025-07-20 15:49:06,591 - __main__ - INFO - sglang running req: 8 queue req: 478
- 2025-07-20 15:49:06,777 - sglang - INFO - [2025-07-20 15:49:06 TP0] Prefill batch. #new-seq: 1, #new-token: 6873, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 7, #queue-req: 477
- 2025-07-20 15:49:06,778 - __main__ - INFO - sglang running req: 7 queue req: 477
- 2025-07-20 15:49:08,736 - sglang - INFO - [2025-07-20 15:49:08 TP0] Prefill batch. #new-seq: 1, #new-token: 4350, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 476
- 2025-07-20 15:49:08,736 - __main__ - INFO - sglang running req: 7 queue req: 476
- 2025-07-20 15:49:10,105 - sglang - INFO - [2025-07-20 15:49:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2543, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 475
- 2025-07-20 15:49:10,105 - __main__ - INFO - sglang running req: 7 queue req: 475
- 2025-07-20 15:49:11,179 - sglang - INFO - [2025-07-20 15:49:11 TP0] Decode batch. #running-req: 8, #token: 30209, token usage: 0.80, gen throughput (token/s): 69.08, #queue-req: 475
- 2025-07-20 15:49:11,180 - __main__ - INFO - sglang running req: 8 queue req: 475
- 2025-07-20 15:49:12,045 - sglang - INFO - [2025-07-20 15:49:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2926, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 7, #queue-req: 474
- 2025-07-20 15:49:12,045 - __main__ - INFO - sglang running req: 7 queue req: 474
- 2025-07-20 15:49:12,987 - sglang - INFO - [2025-07-20 15:49:12 TP0] Decode batch. #running-req: 8, #token: 30233, token usage: 0.80, gen throughput (token/s): 176.49, #queue-req: 474
- 2025-07-20 15:49:12,987 - __main__ - INFO - sglang running req: 8 queue req: 474
- 2025-07-20 15:49:13,920 - sglang - INFO - [2025-07-20 15:49:13 TP0] Decode batch. #running-req: 8, #token: 30553, token usage: 0.80, gen throughput (token/s): 342.84, #queue-req: 474
- 2025-07-20 15:49:13,920 - __main__ - INFO - sglang running req: 8 queue req: 474
- 2025-07-20 15:49:14,763 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:49:14,764 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 28.31 137.88
- finished_output_tokens 10.85 52.86
- sglang_input_tokens 916.69 884.77
- sglang_output_tokens 259.89 262.48
- 2025-07-20 15:49:14,764 - __main__ - INFO -
- Worker ID | errored | finished | started
- ----------+---------+----------+--------
- 0 | 0 | 497 | 500
- 1 | 0 | 10 | 10
- 2 | 0 | 5 | 5
- 3 | 2 | 48 | 529
- 2025-07-20 15:49:14,854 - sglang - INFO - [2025-07-20 15:49:14 TP0] Decode batch. #running-req: 8, #token: 30873, token usage: 0.81, gen throughput (token/s): 342.70, #queue-req: 474
- 2025-07-20 15:49:14,854 - __main__ - INFO - sglang running req: 8 queue req: 474
- 2025-07-20 15:49:15,791 - sglang - INFO - [2025-07-20 15:49:15 TP0] Decode batch. #running-req: 8, #token: 31193, token usage: 0.82, gen throughput (token/s): 341.60, #queue-req: 474
- 2025-07-20 15:49:15,791 - __main__ - INFO - sglang running req: 8 queue req: 474
- 2025-07-20 15:49:16,400 - sglang - INFO - [2025-07-20 15:49:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2657, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 473
- 2025-07-20 15:49:16,400 - __main__ - INFO - sglang running req: 7 queue req: 473
- 2025-07-20 15:49:17,552 - sglang - INFO - [2025-07-20 15:49:17 TP0] Decode batch. #running-req: 8, #token: 30904, token usage: 0.81, gen throughput (token/s): 181.06, #queue-req: 473
- 2025-07-20 15:49:17,553 - __main__ - INFO - sglang running req: 8 queue req: 473
- 2025-07-20 15:49:18,184 - sglang - INFO - [2025-07-20 15:49:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2659, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 472
- 2025-07-20 15:49:18,184 - __main__ - INFO - sglang running req: 7 queue req: 472
- 2025-07-20 15:49:19,113 - __main__ - INFO - Process page scripts/data/11445200MB2D6222364440125017008.pdf-13 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page scripts/data/11445224007035644H44421110A0001.pdf-3 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-3 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22 cancelled
- 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-11 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-25 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-19 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-10 cancelled
- 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-26 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-24 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-18 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-4 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-12 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-14 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-2 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-27 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-16 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-5 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-3 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-23 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-13 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-7 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-22 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-6 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-1 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-8 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-15 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-21 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-9 cancelled
- 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-20 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-17 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-46 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-47 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-33 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-48 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-38 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-39 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-12 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-42 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-26 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-43 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-44 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-45 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-5 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-2 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-1 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-4 cancelled
- 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-3 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10 cancelled
- 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-1 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-4 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-2 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-3 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-1 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-8 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-61 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-18 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-40 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-30 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-51 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-9 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-62 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-19 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-41 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-31 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-52 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-10 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-63 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-20 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-42 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-32 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-53 cancelled
- 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-11 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-64 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-21 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-43 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-33 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-54 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-12 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-65 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-22 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-44 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-2 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-34 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-55 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-13 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-66 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-23 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-45 cancelled
- 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-3 cancelled
- 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-35 cancelled
- 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-56 cancelled
- 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-14 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-67 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-24 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-46 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-4 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-36 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-50 cancelled
- 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-57 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-15 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-68 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-25 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-47 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-5 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-37 cancelled
- 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-58 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-16 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-27 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-48 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-6 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-38 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-59 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-17 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-28 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-49 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-7 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-39 cancelled
- 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-60 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-26 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-29 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-5 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-8 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-2 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-6 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-9 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-4 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-1 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-7 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-3 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-6 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-14 cancelled
- 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-1 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-9 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-4 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-12 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-7 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-15 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-16 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-2 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-10 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-5 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-13 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-8 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-3 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-11 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/small_page_size.pdf-1 cancelled
- 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13 cancelled
- 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18 cancelled
- 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/olmo-page-1.pdf-1 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-2 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-8 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-4 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-6 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-3 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-1 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-9 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-5 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-7 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/some_ocr1.pdf-1 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/dolma-page-1.pdf-1 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-10 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-23 cancelled
- 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-39 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-27 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-50 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-1 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-11 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-24 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-40 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-28 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-51 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-15 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-12 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-41 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-29 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-52 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-2 cancelled
- 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-16 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-13 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-42 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-30 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-53 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-3 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-17 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-43 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-31 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-54 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-4 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-26 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-14 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-44 cancelled
- 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-32 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-36 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-5 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-18 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-45 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-33 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-6 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-19 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-46 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-34 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-7 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-20 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-47 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-35 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-8 cancelled
- 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-48 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-25 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-9 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-22 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-49 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-4 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-2 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-5 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-3 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-6 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-1 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/newspaper.pdf-1 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13 cancelled
- 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8 cancelled
- 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-1 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-6 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-1 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-4 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-7 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-2 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-5 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-8 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-3 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-3 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-6 cancelled
- 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-1 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-4 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-7 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-2 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-5 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-2 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-4 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-8 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-3 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-6 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-5 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-9 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-1 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-7 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-8 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-5 cancelled
- 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-3 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-77 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-43 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-68 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-78 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-26 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-61 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-76 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-36 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-59 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-80 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-16 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-64 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-81 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-46 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-71 cancelled
- 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-82 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-21 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-65 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-83 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-33 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-63 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-84 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-30 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-69 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-79 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-31 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-67 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-73 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-15 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-74 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-57 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-70 cancelled
- 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-75 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-19 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-60 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-72 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-18 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-53 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-55 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-23 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-45 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-27 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-52 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-94 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-14 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-66 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-28 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-9 cancelled
- 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-58 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-90 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-20 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-56 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-17 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-51 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-89 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-44 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-86 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-25 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-62 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-39 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-87 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-12 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-40 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-88 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-22 cancelled
- 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-96 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-54 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-6 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-38 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-2 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-105 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-37 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-5 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-106 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-35 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-10 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-102 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-42 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-99 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-34 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-103 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-47 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-104 cancelled
- 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-50 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-85 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-7 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-101 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-24 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-4 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-92 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-48 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-11 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-98 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-41 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-97 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-29 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-1 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-91 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-3 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-100 cancelled
- 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-32 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-93 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-49 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-8 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-95 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-13 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-5 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-8 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-3 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-10 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-6 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-1 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-9 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-4 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-7 cancelled
- 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-2 cancelled
- 2025-07-20 15:49:19,157 - sglang - INFO - Process Process-2:
- 2025-07-20 15:49:19,157 - sglang - INFO - Process Process-1:
- 2025-07-20 15:49:19,157 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:49:19,157 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
- 2025-07-20 15:49:19,157 - sglang - INFO - self.run()
- 2025-07-20 15:49:19,157 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
- 2025-07-20 15:49:19,157 - sglang - INFO - self._target(*self._args, **self._kwargs)
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
- 2025-07-20 15:49:19,158 - sglang - INFO - scheduler.event_loop_normal()
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
- 2025-07-20 15:49:19,158 - sglang - INFO - return func(*args, **kwargs)
- 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 478, in event_loop_normal
- 2025-07-20 15:49:19,158 - sglang - INFO - self.process_batch_result(batch, result)
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1101, in process_batch_result
- 2025-07-20 15:49:19,158 - sglang - INFO - self.process_batch_result_decode(batch, result)
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1228, in process_batch_result_decode
- 2025-07-20 15:49:19,158 - sglang - INFO - next_token_ids = next_token_ids.tolist()
- 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:49:19,158 - sglang - INFO - KeyboardInterrupt
- 2025-07-20 15:49:19,158 - sglang - INFO - Traceback (most recent call last):
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
- 2025-07-20 15:49:19,158 - sglang - INFO - self.run()
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
- 2025-07-20 15:49:19,158 - sglang - INFO - self._target(*self._args, **self._kwargs)
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/detokenizer_manager.py", line 240, in run_detokenizer_process
- 2025-07-20 15:49:19,158 - sglang - INFO - manager.event_loop()
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/detokenizer_manager.py", line 113, in event_loop
- 2025-07-20 15:49:19,158 - sglang - INFO - recv_obj = self.recv_from_scheduler.recv_pyobj()
- 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/zmq/sugar/socket.py", line 989, in recv_pyobj
- 2025-07-20 15:49:19,158 - sglang - INFO - msg = self.recv(flags)
- 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^
- 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1147, in zmq.backend.cython._zmq.Socket.recv
- 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1182, in zmq.backend.cython._zmq.Socket.recv
- 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1337, in zmq.backend.cython._zmq._recv_copy
- 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 169, in zmq.backend.cython._zmq._check_rc
- 2025-07-20 15:49:19,158 - sglang - INFO - KeyboardInterrupt
- 2025-07-20 15:49:19,164 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:50:09,151 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:50:09,152 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:50:09,152 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:50:09,158 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:50:09,372 - __main__ - INFO - Starting pipeline with PID 599566
- 2025-07-20 15:50:09,372 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:50:14,457 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:50:17,827 - sglang - INFO - [2025-07-20 15:50:17] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378345866, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:50:17,828 - __main__ - INFO - [2025-07-20 15:50:17] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378345866, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:50:18,989 - sglang - INFO - [2025-07-20 15:50:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:50:18,990 - __main__ - INFO - [2025-07-20 15:50:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:50:20,550 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:50:24,963 - sglang - INFO - [2025-07-20 15:50:24 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:50:24,963 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:50:24,965 - sglang - INFO - [2025-07-20 15:50:24 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:50:24,965 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:50:24,966 - sglang - INFO - [2025-07-20 15:50:24 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:50:24,966 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:50:24,966 - sglang - INFO - [2025-07-20 15:50:24 TP0] Init torch distributed begin.
- 2025-07-20 15:50:24,966 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Init torch distributed begin.
- 2025-07-20 15:50:26,630 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:50:30,629 - sglang - INFO - [2025-07-20 15:50:30 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:50:30,629 - __main__ - INFO - [2025-07-20 15:50:30 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:50:31,373 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:50:31,374 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:50:32,710 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:50:38,837 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:50:40,857 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.48s/it]
- 2025-07-20 15:50:40,857 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.48s/it]
- 2025-07-20 15:50:44,915 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:50:50,576 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:19<00:19, 9.62s/it]
- 2025-07-20 15:50:50,576 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:19<00:19, 9.62s/it]
- 2025-07-20 15:50:50,994 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:50:57,073 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:51:02,104 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:10, 10.49s/it]
- 2025-07-20 15:51:02,104 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:10, 10.49s/it]
- 2025-07-20 15:51:03,151 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:51:05,867 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 7.84s/it]
- 2025-07-20 15:51:05,867 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 7.84s/it]
- 2025-07-20 15:51:05,867 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 8.62s/it]
- 2025-07-20 15:51:05,867 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 8.62s/it]
- 2025-07-20 15:51:05,867 - sglang - INFO -
- 2025-07-20 15:51:05,867 - __main__ - INFO -
- 2025-07-20 15:51:05,930 - sglang - INFO - [2025-07-20 15:51:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:51:05,930 - __main__ - INFO - [2025-07-20 15:51:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:51:05,937 - sglang - INFO - [2025-07-20 15:51:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:51:05,937 - __main__ - INFO - [2025-07-20 15:51:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:51:05,938 - sglang - INFO - [2025-07-20 15:51:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:51:05,938 - __main__ - INFO - [2025-07-20 15:51:05 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:51:06,148 - sglang - INFO - [2025-07-20 15:51:06 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:51:06,148 - __main__ - INFO - [2025-07-20 15:51:06 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:51:08,325 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.28s/it]
50%|█████ | 2/4 [00:01<00:01, 1.44it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.93it/s]
100%|██████████| 4/4 [00:02<00:00, 2.31it/s]
100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
- 2025-07-20 15:51:08,325 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.28s/it]
50%|█████ | 2/4 [00:01<00:01, 1.44it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.93it/s]
100%|██████████| 4/4 [00:02<00:00, 2.31it/s]
100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
- 2025-07-20 15:51:08,325 - sglang - INFO - [2025-07-20 15:51:08 TP0] Capture cuda graph end. Time elapsed: 2.18 s
- 2025-07-20 15:51:08,325 - __main__ - INFO - [2025-07-20 15:51:08 TP0] Capture cuda graph end. Time elapsed: 2.18 s
- 2025-07-20 15:51:09,104 - sglang - INFO - [2025-07-20 15:51:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:51:09,104 - __main__ - INFO - [2025-07-20 15:51:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:51:09,230 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:51:09,231 - sglang - INFO - [2025-07-20 15:51:09] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30025): address already in use
- 2025-07-20 15:51:09,231 - __main__ - INFO - [2025-07-20 15:51:09] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30025): address already in use
- 2025-07-20 15:51:15,310 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:51:21,390 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:51:27,427 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:51:33,506 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:51:39,584 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:51:45,663 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:51:51,742 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:51:57,848 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:52:03,922 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:52:09,999 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:52:16,134 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:52:16,544 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 15:53:17,272 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:53:17,272 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:53:17,272 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:53:17,275 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:53:17,480 - __main__ - INFO - Starting pipeline with PID 600445
- 2025-07-20 15:53:17,480 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:53:17,585 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:53:18,617 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:53:19,669 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:53:20,741 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:53:21,812 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:53:22,888 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:53:23,955 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:53:24,205 - sglang - INFO - [2025-07-20 15:53:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901973505, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:53:24,206 - __main__ - INFO - [2025-07-20 15:53:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901973505, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:53:25,042 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:53:25,272 - sglang - INFO - [2025-07-20 15:53:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:53:25,273 - __main__ - INFO - [2025-07-20 15:53:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:53:26,113 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:53:27,180 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:53:28,246 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:53:29,447 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:53:30,535 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:53:31,117 - sglang - INFO - [2025-07-20 15:53:31 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:53:31,117 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Init torch distributed begin.
- 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Init torch distributed begin.
- 2025-07-20 15:53:31,612 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:53:32,675 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:53:33,741 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:53:34,812 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:53:35,887 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:53:36,705 - sglang - INFO - [2025-07-20 15:53:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:53:36,705 - __main__ - INFO - [2025-07-20 15:53:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:53:36,923 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:53:37,279 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:53:37,279 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:53:37,976 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:53:38,259 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
- 2025-07-20 15:53:38,259 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
- 2025-07-20 15:53:39,039 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:53:39,509 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.14s/it]
- 2025-07-20 15:53:39,509 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.14s/it]
- 2025-07-20 15:53:40,091 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 15:53:40,530 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.08s/it]
- 2025-07-20 15:53:40,530 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.08s/it]
- 2025-07-20 15:53:40,927 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
- 2025-07-20 15:53:40,927 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
- 2025-07-20 15:53:40,927 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
- 2025-07-20 15:53:40,927 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
- 2025-07-20 15:53:40,927 - sglang - INFO -
- 2025-07-20 15:53:40,927 - __main__ - INFO -
- 2025-07-20 15:53:40,978 - sglang - INFO - [2025-07-20 15:53:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:53:40,978 - __main__ - INFO - [2025-07-20 15:53:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 15:53:40,984 - sglang - INFO - [2025-07-20 15:53:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:53:40,984 - __main__ - INFO - [2025-07-20 15:53:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 15:53:40,984 - sglang - INFO - [2025-07-20 15:53:40 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:53:40,984 - __main__ - INFO - [2025-07-20 15:53:40 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 15:53:41,132 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 15:53:41,153 - sglang - INFO - [2025-07-20 15:53:41 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:53:41,153 - __main__ - INFO - [2025-07-20 15:53:41 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 15:53:42,177 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 15:53:43,016 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.71it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.63it/s]
100%|██████████| 4/4 [00:01<00:00, 2.15it/s]
- 2025-07-20 15:53:43,016 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.71it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.63it/s]
100%|██████████| 4/4 [00:01<00:00, 2.15it/s]
- 2025-07-20 15:53:43,016 - sglang - INFO - [2025-07-20 15:53:43 TP0] Capture cuda graph end. Time elapsed: 1.86 s
- 2025-07-20 15:53:43,016 - __main__ - INFO - [2025-07-20 15:53:43 TP0] Capture cuda graph end. Time elapsed: 1.86 s
- 2025-07-20 15:53:43,218 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 15:53:43,703 - sglang - INFO - [2025-07-20 15:53:43 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:53:43,704 - __main__ - INFO - [2025-07-20 15:53:43 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 15:53:44,277 - __main__ - INFO - sglang server is ready.
- 2025-07-20 15:53:44,277 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 15:53:44,278 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:53:44,278 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 15:53:44,278 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-07-20 15:53:44,278 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-07-20 15:53:44,283 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
- 2025-07-20 15:53:44,790 - sglang - INFO - [2025-07-20 15:53:44 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 15:53:44,790 - __main__ - INFO - [2025-07-20 15:53:44 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 15:53:44,791 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 15:53:45,773 - sglang - INFO - [2025-07-20 15:53:45] The server is fired up and ready to roll!
- 2025-07-20 15:53:45,773 - __main__ - INFO - [2025-07-20 15:53:45] The server is fired up and ready to roll!
- 2025-07-20 15:53:50,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
- 2025-07-20 15:53:54,279 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:53:54,334 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:53:54,334 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 15:54:04,335 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:54:04,340 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:54:04,340 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 15:54:14,341 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 15:54:14,342 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 15:54:14,342 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 15:59:14,110 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 15:59:14,110 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
- 2025-07-20 15:59:14,110 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 15:59:14,116 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
- 2025-07-20 15:59:14,297 - __main__ - INFO - Starting pipeline with PID 602377
- 2025-07-20 15:59:14,297 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 15:59:14,391 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 15:59:15,421 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 15:59:16,455 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 15:59:17,506 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 15:59:18,558 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 15:59:19,643 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 15:59:20,706 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 15:59:21,736 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 15:59:22,783 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 15:59:23,486 - sglang - INFO - [2025-07-20 15:59:23] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=536173719, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:59:23,486 - __main__ - INFO - [2025-07-20 15:59:23] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=536173719, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 15:59:23,876 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 15:59:24,549 - sglang - INFO - [2025-07-20 15:59:24] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:59:24,549 - __main__ - INFO - [2025-07-20 15:59:24] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 15:59:24,955 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 15:59:26,040 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 15:59:27,100 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 15:59:28,159 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 15:59:29,204 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 15:59:30,274 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 15:59:30,705 - sglang - INFO - [2025-07-20 15:59:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:59:30,705 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 15:59:30,707 - sglang - INFO - [2025-07-20 15:59:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:59:30,707 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 15:59:30,707 - sglang - INFO - [2025-07-20 15:59:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:59:30,707 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 15:59:30,708 - sglang - INFO - [2025-07-20 15:59:30 TP0] Init torch distributed begin.
- 2025-07-20 15:59:30,708 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Init torch distributed begin.
- 2025-07-20 15:59:31,348 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 15:59:32,384 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 15:59:33,440 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 15:59:34,493 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 15:59:35,573 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 15:59:36,200 - sglang - INFO - [2025-07-20 15:59:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:59:36,200 - __main__ - INFO - [2025-07-20 15:59:36 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 15:59:36,649 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 15:59:37,318 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:59:37,318 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 15:59:37,706 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 15:59:38,774 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 15:59:39,799 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 15:59:40,844 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 15:59:41,884 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 15:59:42,944 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 15:59:43,989 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 15:59:45,030 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 15:59:46,091 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 15:59:47,175 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 15:59:48,244 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 15:59:49,295 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 15:59:50,339 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 15:59:50,473 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.15s/it]
- 2025-07-20 15:59:50,473 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.15s/it]
- 2025-07-20 15:59:51,389 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 15:59:52,446 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 15:59:53,489 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 15:59:54,520 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 15:59:55,561 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 15:59:56,607 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 15:59:57,649 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 15:59:58,700 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 15:59:59,756 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:00:00,800 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:00:01,838 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:00:02,881 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:00:03,817 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.27s/it]
- 2025-07-20 16:00:03,817 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.27s/it]
- 2025-07-20 16:00:03,938 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:00:04,995 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:00:06,040 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:00:07,082 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:00:08,149 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:00:09,213 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:00:10,281 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:00:11,341 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:00:12,383 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:00:13,440 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:00:14,504 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:00:15,574 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:00:16,647 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:00:17,136 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.29s/it]
- 2025-07-20 16:00:17,136 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.29s/it]
- 2025-07-20 16:00:17,699 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:00:18,757 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:00:19,812 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:00:20,864 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:00:21,907 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 9.91s/it]
- 2025-07-20 16:00:21,907 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 9.91s/it]
- 2025-07-20 16:00:21,907 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.14s/it]
- 2025-07-20 16:00:21,907 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.14s/it]
- 2025-07-20 16:00:21,907 - sglang - INFO -
- 2025-07-20 16:00:21,907 - __main__ - INFO -
- 2025-07-20 16:00:21,909 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:00:21,966 - sglang - INFO - [2025-07-20 16:00:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:00:21,966 - __main__ - INFO - [2025-07-20 16:00:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:00:21,978 - sglang - INFO - [2025-07-20 16:00:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:00:21,978 - __main__ - INFO - [2025-07-20 16:00:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:00:21,978 - sglang - INFO - [2025-07-20 16:00:21 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:00:21,978 - __main__ - INFO - [2025-07-20 16:00:21 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:00:22,362 - sglang - INFO - [2025-07-20 16:00:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:00:22,362 - __main__ - INFO - [2025-07-20 16:00:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:00:22,999 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 16:00:24,106 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
- 2025-07-20 16:00:24,997 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.61s/it]
50%|█████ | 2/4 [00:01<00:01, 1.14it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.60it/s]
100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
100%|██████████| 4/4 [00:02<00:00, 1.52it/s]
- 2025-07-20 16:00:24,997 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.61s/it]
50%|█████ | 2/4 [00:01<00:01, 1.14it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.60it/s]
100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
100%|██████████| 4/4 [00:02<00:00, 1.52it/s]
- 2025-07-20 16:00:24,997 - sglang - INFO - [2025-07-20 16:00:24 TP0] Capture cuda graph end. Time elapsed: 2.64 s
- 2025-07-20 16:00:24,997 - __main__ - INFO - [2025-07-20 16:00:24 TP0] Capture cuda graph end. Time elapsed: 2.64 s
- 2025-07-20 16:00:25,198 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
- 2025-07-20 16:00:25,894 - sglang - INFO - [2025-07-20 16:00:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:00:25,894 - __main__ - INFO - [2025-07-20 16:00:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:00:26,332 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:00:26,333 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 16:00:26,333 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:00:26,333 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:00:26,333 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-07-20 16:00:26,333 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
- 2025-07-20 16:00:26,340 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
- 2025-07-20 16:00:27,007 - sglang - INFO - [2025-07-20 16:00:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:00:27,007 - __main__ - INFO - [2025-07-20 16:00:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:00:27,008 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:00:28,270 - sglang - INFO - [2025-07-20 16:00:28] The server is fired up and ready to roll!
- 2025-07-20 16:00:28,270 - __main__ - INFO - [2025-07-20 16:00:28] The server is fired up and ready to roll!
- 2025-07-20 16:00:32,982 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
- 2025-07-20 16:00:36,336 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:00:36,336 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:00:36,336 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 16:00:46,337 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:00:46,341 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:00:46,341 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 16:00:56,344 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:00:56,344 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:00:56,344 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 16:00:58,657 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-07-20 16:00:58,658 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-07-20 16:00:58,658 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-07-20 16:01:00,171 - sglang - INFO - [2025-07-20 16:01:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:01:00,171 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:02:40,891 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:02:40,951 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:02:40,952 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-07-20 16:08:42,791 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:08:42,792 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:08:42,792 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:08:42,793 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:08:42,793 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:08:42,799 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:08:42,799 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:08:42,802 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:08:42,802 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:08:42,895 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
- 2025-07-20 16:08:43,116 - __main__ - INFO - Starting pipeline with PID 604527
- 2025-07-20 16:08:43,116 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:08:43,205 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:08:44,236 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:08:45,273 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:08:46,325 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:08:47,376 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:08:48,442 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:08:49,509 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:08:50,577 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:08:51,644 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:08:51,682 - sglang - INFO - [2025-07-20 16:08:51] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=19970587, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:08:51,682 - __main__ - INFO - [2025-07-20 16:08:51] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=19970587, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:08:52,707 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:08:52,763 - sglang - INFO - [2025-07-20 16:08:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:08:52,763 - __main__ - INFO - [2025-07-20 16:08:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:08:53,739 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:08:54,800 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:08:55,868 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:08:57,085 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:08:58,164 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:08:58,652 - sglang - INFO - [2025-07-20 16:08:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:08:58,652 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:08:58,654 - sglang - INFO - [2025-07-20 16:08:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:08:58,654 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:08:58,654 - sglang - INFO - [2025-07-20 16:08:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:08:58,655 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:08:58,655 - sglang - INFO - [2025-07-20 16:08:58 TP0] Init torch distributed begin.
- 2025-07-20 16:08:58,655 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Init torch distributed begin.
- 2025-07-20 16:08:59,241 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:09:00,296 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:09:01,363 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:09:02,421 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:09:03,488 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:09:04,254 - sglang - INFO - [2025-07-20 16:09:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:09:04,254 - __main__ - INFO - [2025-07-20 16:09:04 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:09:04,565 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:09:04,934 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:09:04,934 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:09:05,642 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:09:06,709 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:09:07,777 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:09:08,845 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:09:09,906 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:09:10,977 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:09:12,050 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:09:13,118 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:09:14,186 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:09:15,250 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:09:16,310 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:09:17,362 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:09:17,696 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.76s/it]
- 2025-07-20 16:09:17,696 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.76s/it]
- 2025-07-20 16:09:17,885 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 16:09:38,253 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:09:38,255 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:09:38,306 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
- 2025-07-20 16:09:38,492 - __main__ - INFO - Starting pipeline with PID 605324
- 2025-07-20 16:09:38,492 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:09:38,558 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:09:39,588 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:09:40,623 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:09:41,668 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:09:42,730 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:09:43,797 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:09:44,867 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:09:45,079 - sglang - INFO - [2025-07-20 16:09:45] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=829163176, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:09:45,079 - __main__ - INFO - [2025-07-20 16:09:45] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=829163176, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:09:45,967 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:09:46,043 - sglang - INFO - [2025-07-20 16:09:46] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:09:46,043 - __main__ - INFO - [2025-07-20 16:09:46] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:09:47,043 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:09:48,112 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:09:49,182 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:09:50,238 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:09:51,283 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:09:52,356 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:09:52,586 - sglang - INFO - [2025-07-20 16:09:52 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:09:52,587 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:09:52,589 - sglang - INFO - [2025-07-20 16:09:52 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:09:52,589 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:09:52,590 - sglang - INFO - [2025-07-20 16:09:52 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:09:52,590 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:09:52,590 - sglang - INFO - [2025-07-20 16:09:52 TP0] Init torch distributed begin.
- 2025-07-20 16:09:52,590 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Init torch distributed begin.
- 2025-07-20 16:09:53,433 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:09:54,500 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:09:55,568 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:09:56,612 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:09:57,663 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:09:58,130 - sglang - INFO - [2025-07-20 16:09:58 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:09:58,130 - __main__ - INFO - [2025-07-20 16:09:58 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:09:58,738 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:09:58,738 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:09:58,739 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:09:59,480 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.26it/s]
- 2025-07-20 16:09:59,480 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.26it/s]
- 2025-07-20 16:09:59,815 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:10:00,882 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:10:01,911 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:10:02,974 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:10:04,041 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:10:05,113 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:10:06,180 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:10:07,247 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:10:08,314 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:10:09,381 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:10:10,461 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:10:11,524 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:10:12,226 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:13<00:15, 7.83s/it]
- 2025-07-20 16:10:12,226 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:13<00:15, 7.83s/it]
- 2025-07-20 16:10:12,600 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:10:13,667 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:10:14,722 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:10:15,789 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:10:16,859 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:10:17,927 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:10:18,994 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:10:20,066 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:10:21,133 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:10:22,205 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:10:23,272 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:10:24,339 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:10:24,897 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:26<00:10, 10.04s/it]
- 2025-07-20 16:10:24,897 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:26<00:10, 10.04s/it]
- 2025-07-20 16:10:25,415 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:10:26,482 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:10:27,549 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:10:28,616 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:10:29,296 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.81s/it]
- 2025-07-20 16:10:29,296 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.81s/it]
- 2025-07-20 16:10:29,296 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.65s/it]
- 2025-07-20 16:10:29,296 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.65s/it]
- 2025-07-20 16:10:29,296 - sglang - INFO -
- 2025-07-20 16:10:29,296 - __main__ - INFO -
- 2025-07-20 16:10:29,377 - sglang - INFO - [2025-07-20 16:10:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:10:29,377 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:10:29,390 - sglang - INFO - [2025-07-20 16:10:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:10:29,391 - __main__ - INFO - [2025-07-20 16:10:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:10:29,391 - sglang - INFO - [2025-07-20 16:10:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:10:29,391 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:10:29,605 - sglang - INFO - [2025-07-20 16:10:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:10:29,605 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:10:29,692 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:10:30,746 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:10:31,822 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:10:31,851 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.38s/it]
50%|█████ | 2/4 [00:01<00:01, 1.36it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.89it/s]
100%|██████████| 4/4 [00:02<00:00, 2.29it/s]
100%|██████████| 4/4 [00:02<00:00, 1.78it/s]
- 2025-07-20 16:10:31,851 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.38s/it]
50%|█████ | 2/4 [00:01<00:01, 1.36it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.89it/s]
100%|██████████| 4/4 [00:02<00:00, 2.29it/s]
100%|██████████| 4/4 [00:02<00:00, 1.78it/s]
- 2025-07-20 16:10:31,851 - sglang - INFO - [2025-07-20 16:10:31 TP0] Capture cuda graph end. Time elapsed: 2.25 s
- 2025-07-20 16:10:31,851 - __main__ - INFO - [2025-07-20 16:10:31 TP0] Capture cuda graph end. Time elapsed: 2.25 s
- 2025-07-20 16:10:32,610 - sglang - INFO - [2025-07-20 16:10:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:10:32,610 - __main__ - INFO - [2025-07-20 16:10:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:10:32,906 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:10:32,906 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 16:10:32,906 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:10:32,906 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:10:32,907 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:10:32,907 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:10:32,920 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
- 2025-07-20 16:10:32,923 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
- 2025-07-20 16:10:32,925 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
- 2025-07-20 16:10:32,927 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
- 2025-07-20 16:10:32,930 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
- 2025-07-20 16:10:32,932 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
- 2025-07-20 16:10:32,934 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
- 2025-07-20 16:10:32,936 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
- 2025-07-20 16:10:32,939 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
- 2025-07-20 16:10:32,942 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
- 2025-07-20 16:10:32,944 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
- 2025-07-20 16:10:32,946 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
- 2025-07-20 16:10:32,949 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 16:10:32,951 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
- 2025-07-20 16:10:32,953 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
- 2025-07-20 16:10:32,955 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
- 2025-07-20 16:10:32,957 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
- 2025-07-20 16:10:32,959 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
- 2025-07-20 16:10:32,961 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
- 2025-07-20 16:10:32,962 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
- 2025-07-20 16:10:32,964 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
- 2025-07-20 16:10:32,966 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
- 2025-07-20 16:10:32,968 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
- 2025-07-20 16:10:32,970 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
- 2025-07-20 16:10:32,972 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
- 2025-07-20 16:10:32,975 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
- 2025-07-20 16:10:32,976 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
- 2025-07-20 16:10:32,978 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
- 2025-07-20 16:10:33,053 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
- 2025-07-20 16:10:33,057 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
- 2025-07-20 16:10:33,743 - sglang - INFO - [2025-07-20 16:10:33 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:10:33,743 - __main__ - INFO - [2025-07-20 16:10:33 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:10:33,744 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:10:39,640 - sglang - INFO - [2025-07-20 16:10:39] The server is fired up and ready to roll!
- 2025-07-20 16:10:39,640 - __main__ - INFO - [2025-07-20 16:10:39] The server is fired up and ready to roll!
- 2025-07-20 16:10:42,934 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:10:42,934 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:10:42,935 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:10:52,419 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
- 2025-07-20 16:10:52,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
- 2025-07-20 16:10:52,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
- 2025-07-20 16:10:52,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
- 2025-07-20 16:10:52,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
- 2025-07-20 16:10:52,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
- 2025-07-20 16:10:52,464 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
- 2025-07-20 16:10:52,481 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 16:10:52,482 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
- 2025-07-20 16:10:52,489 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
- 2025-07-20 16:10:52,492 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
- 2025-07-20 16:10:52,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
- 2025-07-20 16:10:52,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
- 2025-07-20 16:10:52,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
- 2025-07-20 16:10:52,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
- 2025-07-20 16:10:52,553 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
- 2025-07-20 16:10:52,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
- 2025-07-20 16:10:52,563 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
- 2025-07-20 16:10:52,570 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
- 2025-07-20 16:10:52,572 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
- 2025-07-20 16:10:52,579 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
- 2025-07-20 16:10:52,579 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
- 2025-07-20 16:10:52,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
- 2025-07-20 16:10:52,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
- 2025-07-20 16:10:52,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
- 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
- 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
- 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
- 2025-07-20 16:10:52,653 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
- 2025-07-20 16:10:52,654 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
- 2025-07-20 16:10:52,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
- 2025-07-20 16:10:52,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
- 2025-07-20 16:10:52,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
- 2025-07-20 16:10:52,664 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
- 2025-07-20 16:10:52,671 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
- 2025-07-20 16:10:52,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
- 2025-07-20 16:10:52,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
- 2025-07-20 16:10:52,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
- 2025-07-20 16:10:52,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
- 2025-07-20 16:10:52,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
- 2025-07-20 16:10:52,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
- 2025-07-20 16:10:52,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
- 2025-07-20 16:10:52,752 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
- 2025-07-20 16:10:52,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
- 2025-07-20 16:10:52,754 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
- 2025-07-20 16:10:52,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
- 2025-07-20 16:10:52,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
- 2025-07-20 16:10:52,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
- 2025-07-20 16:10:52,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
- 2025-07-20 16:10:52,841 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
- 2025-07-20 16:10:52,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
- 2025-07-20 16:10:52,844 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
- 2025-07-20 16:10:52,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
- 2025-07-20 16:10:52,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
- 2025-07-20 16:10:52,846 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
- 2025-07-20 16:10:52,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
- 2025-07-20 16:10:52,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
- 2025-07-20 16:10:52,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
- 2025-07-20 16:10:52,936 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
- 2025-07-20 16:10:52,936 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
- 2025-07-20 16:10:52,937 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
- 2025-07-20 16:10:52,944 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:10:52,944 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:10:52,945 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:10:52,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
- 2025-07-20 16:10:52,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
- 2025-07-20 16:10:52,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
- 2025-07-20 16:10:52,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
- 2025-07-20 16:10:52,948 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
- 2025-07-20 16:10:52,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
- 2025-07-20 16:10:52,957 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
- 2025-07-20 16:10:52,958 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
- 2025-07-20 16:10:53,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 16:10:53,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
- 2025-07-20 16:10:53,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
- 2025-07-20 16:10:53,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 16:10:53,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
- 2025-07-20 16:10:53,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
- 2025-07-20 16:10:53,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
- 2025-07-20 16:10:53,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
- 2025-07-20 16:10:53,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
- 2025-07-20 16:10:53,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
- 2025-07-20 16:10:53,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
- 2025-07-20 16:10:53,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
- 2025-07-20 16:10:53,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 16:10:53,159 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
- 2025-07-20 16:10:53,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 16:10:53,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
- 2025-07-20 16:10:53,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
- 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
- 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
- 2025-07-20 16:10:53,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
- 2025-07-20 16:10:53,259 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
- 2025-07-20 16:10:53,260 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
- 2025-07-20 16:10:53,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
- 2025-07-20 16:10:53,335 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
- 2025-07-20 16:10:53,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
- 2025-07-20 16:10:53,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
- 2025-07-20 16:10:53,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
- 2025-07-20 16:10:53,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
- 2025-07-20 16:10:53,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
- 2025-07-20 16:10:53,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
- 2025-07-20 16:10:53,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
- 2025-07-20 16:10:53,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
- 2025-07-20 16:10:53,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
- 2025-07-20 16:10:53,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 16:10:53,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
- 2025-07-20 16:10:53,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
- 2025-07-20 16:10:53,455 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
- 2025-07-20 16:10:53,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
- 2025-07-20 16:10:53,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
- 2025-07-20 16:10:53,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
- 2025-07-20 16:10:53,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
- 2025-07-20 16:10:53,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
- 2025-07-20 16:10:53,545 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
- 2025-07-20 16:10:53,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
- 2025-07-20 16:10:53,549 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
- 2025-07-20 16:10:53,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
- 2025-07-20 16:10:53,551 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
- 2025-07-20 16:10:53,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
- 2025-07-20 16:10:53,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
- 2025-07-20 16:10:53,650 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
- 2025-07-20 16:10:53,652 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
- 2025-07-20 16:10:53,653 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
- 2025-07-20 16:10:53,654 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
- 2025-07-20 16:10:53,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
- 2025-07-20 16:10:53,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
- 2025-07-20 16:10:53,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
- 2025-07-20 16:10:53,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
- 2025-07-20 16:10:53,757 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
- 2025-07-20 16:10:53,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
- 2025-07-20 16:10:53,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
- 2025-07-20 16:10:53,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
- 2025-07-20 16:10:53,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
- 2025-07-20 16:10:53,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
- 2025-07-20 16:10:53,841 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
- 2025-07-20 16:10:53,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
- 2025-07-20 16:10:53,844 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
- 2025-07-20 16:10:53,846 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
- 2025-07-20 16:10:53,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
- 2025-07-20 16:10:53,940 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
- 2025-07-20 16:10:53,941 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
- 2025-07-20 16:10:53,942 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
- 2025-07-20 16:10:53,944 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 16:10:53,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
- 2025-07-20 16:10:53,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
- 2025-07-20 16:10:53,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
- 2025-07-20 16:10:53,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
- 2025-07-20 16:10:53,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
- 2025-07-20 16:10:54,034 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
- 2025-07-20 16:10:54,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
- 2025-07-20 16:10:54,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
- 2025-07-20 16:10:54,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
- 2025-07-20 16:10:54,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
- 2025-07-20 16:10:54,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
- 2025-07-20 16:10:54,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
- 2025-07-20 16:10:54,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
- 2025-07-20 16:10:54,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
- 2025-07-20 16:10:54,240 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
- 2025-07-20 16:10:54,243 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
- 2025-07-20 16:10:54,245 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
- 2025-07-20 16:10:54,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
- 2025-07-20 16:10:54,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
- 2025-07-20 16:10:54,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
- 2025-07-20 16:10:54,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
- 2025-07-20 16:10:54,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
- 2025-07-20 16:10:54,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
- 2025-07-20 16:10:54,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
- 2025-07-20 16:10:54,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
- 2025-07-20 16:10:54,356 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
- 2025-07-20 16:10:54,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
- 2025-07-20 16:10:54,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
- 2025-07-20 16:10:54,362 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
- 2025-07-20 16:10:54,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
- 2025-07-20 16:10:54,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
- 2025-07-20 16:10:54,367 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
- 2025-07-20 16:10:54,369 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
- 2025-07-20 16:10:54,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
- 2025-07-20 16:10:54,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
- 2025-07-20 16:10:54,438 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
- 2025-07-20 16:10:54,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
- 2025-07-20 16:10:54,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
- 2025-07-20 16:10:54,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
- 2025-07-20 16:10:54,457 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
- 2025-07-20 16:10:54,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
- 2025-07-20 16:10:54,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
- 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
- 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
- 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
- 2025-07-20 16:10:54,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
- 2025-07-20 16:10:54,465 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
- 2025-07-20 16:10:54,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 16:10:54,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
- 2025-07-20 16:10:54,540 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
- 2025-07-20 16:10:54,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
- 2025-07-20 16:10:54,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
- 2025-07-20 16:10:54,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
- 2025-07-20 16:10:54,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
- 2025-07-20 16:10:54,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
- 2025-07-20 16:10:54,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
- 2025-07-20 16:10:54,554 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
- 2025-07-20 16:10:54,554 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
- 2025-07-20 16:10:54,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
- 2025-07-20 16:10:54,560 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
- 2025-07-20 16:10:54,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
- 2025-07-20 16:10:54,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 16:10:54,666 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
- 2025-07-20 16:10:54,668 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
- 2025-07-20 16:10:54,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
- 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
- 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
- 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
- 2025-07-20 16:10:54,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
- 2025-07-20 16:10:54,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
- 2025-07-20 16:10:54,741 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
- 2025-07-20 16:10:54,741 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
- 2025-07-20 16:10:54,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
- 2025-07-20 16:10:54,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
- 2025-07-20 16:10:54,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
- 2025-07-20 16:10:54,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
- 2025-07-20 16:10:54,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
- 2025-07-20 16:10:54,752 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 16:10:54,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
- 2025-07-20 16:10:54,772 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
- 2025-07-20 16:10:54,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
- 2025-07-20 16:10:54,854 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
- 2025-07-20 16:10:54,854 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
- 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
- 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
- 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
- 2025-07-20 16:11:02,945 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:11:02,949 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:11:02,952 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:11:09,843 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-07-20 16:13:35,181 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:13:35,184 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:13:35,184 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:13:35,189 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:13:35,189 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:13:35,192 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:13:35,192 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:13:35,192 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:13:35,292 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
- 2025-07-20 16:13:35,458 - __main__ - INFO - Starting pipeline with PID 609195
- 2025-07-20 16:13:35,459 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:13:35,572 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:13:36,598 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:13:37,641 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:13:38,699 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:13:39,765 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:13:40,832 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:13:41,861 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:13:42,920 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:13:43,777 - sglang - INFO - [2025-07-20 16:13:43] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=144398080, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:13:43,777 - __main__ - INFO - [2025-07-20 16:13:43] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=144398080, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:13:44,053 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:13:44,669 - sglang - INFO - [2025-07-20 16:13:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:13:44,669 - __main__ - INFO - [2025-07-20 16:13:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:13:45,097 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:13:46,160 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:13:47,196 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:13:48,259 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:13:49,395 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:13:50,462 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:13:50,913 - sglang - INFO - [2025-07-20 16:13:50 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:13:50,913 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:13:50,915 - sglang - INFO - [2025-07-20 16:13:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:13:50,915 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:13:50,915 - sglang - INFO - [2025-07-20 16:13:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:13:50,915 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:13:50,916 - sglang - INFO - [2025-07-20 16:13:50 TP0] Init torch distributed begin.
- 2025-07-20 16:13:50,916 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Init torch distributed begin.
- 2025-07-20 16:13:51,546 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:13:52,607 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:13:53,674 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:13:54,746 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:13:55,800 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:13:56,515 - sglang - INFO - [2025-07-20 16:13:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:13:56,515 - __main__ - INFO - [2025-07-20 16:13:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:13:56,854 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:13:57,220 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:13:57,221 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:13:57,906 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:13:58,974 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:14:00,041 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:14:01,108 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:14:02,175 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:14:03,243 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:14:04,311 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:14:05,379 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:14:06,408 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:14:07,460 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:14:08,524 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:14:09,594 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:14:10,327 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.11s/it]
- 2025-07-20 16:14:10,327 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.11s/it]
- 2025-07-20 16:14:10,664 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:14:11,716 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:14:12,782 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:14:13,850 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:14:14,917 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:14:15,947 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:14:17,004 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:14:18,072 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:14:19,139 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:14:20,207 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:14:21,251 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:14:22,315 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:14:23,383 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:14:23,546 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.17s/it]
- 2025-07-20 16:14:23,546 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.17s/it]
- 2025-07-20 16:14:24,460 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:14:25,528 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:14:26,595 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:14:27,662 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:14:28,725 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:14:29,784 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:14:30,850 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:14:31,916 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:14:32,983 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:14:34,052 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:14:35,119 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:14:36,187 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:14:36,446 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.05s/it]
- 2025-07-20 16:14:36,446 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.05s/it]
- 2025-07-20 16:14:37,263 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:14:38,330 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:14:39,398 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:14:40,465 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:14:41,003 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.70s/it]
- 2025-07-20 16:14:41,003 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.70s/it]
- 2025-07-20 16:14:41,004 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:14:41,004 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:14:41,004 - sglang - INFO -
- 2025-07-20 16:14:41,004 - __main__ - INFO -
- 2025-07-20 16:14:41,099 - sglang - INFO - [2025-07-20 16:14:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:14:41,099 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:14:41,111 - sglang - INFO - [2025-07-20 16:14:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:14:41,111 - __main__ - INFO - [2025-07-20 16:14:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:14:41,111 - sglang - INFO - [2025-07-20 16:14:41 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:14:41,111 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:14:41,330 - sglang - INFO - [2025-07-20 16:14:41 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:14:41,330 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:14:41,542 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:14:42,609 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:14:43,465 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.32s/it]
50%|█████ | 2/4 [00:01<00:01, 1.43it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.98it/s]
100%|██████████| 4/4 [00:02<00:00, 2.42it/s]
100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
- 2025-07-20 16:14:43,465 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.32s/it]
50%|█████ | 2/4 [00:01<00:01, 1.43it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.98it/s]
100%|██████████| 4/4 [00:02<00:00, 2.42it/s]
100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
- 2025-07-20 16:14:43,465 - sglang - INFO - [2025-07-20 16:14:43 TP0] Capture cuda graph end. Time elapsed: 2.13 s
- 2025-07-20 16:14:43,465 - __main__ - INFO - [2025-07-20 16:14:43 TP0] Capture cuda graph end. Time elapsed: 2.13 s
- 2025-07-20 16:14:43,686 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:14:44,203 - sglang - INFO - [2025-07-20 16:14:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:14:44,203 - __main__ - INFO - [2025-07-20 16:14:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:14:44,783 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:14:44,783 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 16:14:44,783 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:14:44,784 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:14:44,784 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:14:44,784 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:14:44,796 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
- 2025-07-20 16:14:44,799 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
- 2025-07-20 16:14:44,801 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
- 2025-07-20 16:14:44,803 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
- 2025-07-20 16:14:44,805 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
- 2025-07-20 16:14:44,807 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
- 2025-07-20 16:14:44,809 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
- 2025-07-20 16:14:44,812 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
- 2025-07-20 16:14:44,814 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
- 2025-07-20 16:14:44,816 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
- 2025-07-20 16:14:44,818 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
- 2025-07-20 16:14:44,820 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 16:14:44,823 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
- 2025-07-20 16:14:44,825 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
- 2025-07-20 16:14:44,827 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
- 2025-07-20 16:14:44,829 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
- 2025-07-20 16:14:44,831 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
- 2025-07-20 16:14:44,833 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
- 2025-07-20 16:14:44,834 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
- 2025-07-20 16:14:44,836 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
- 2025-07-20 16:14:44,838 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
- 2025-07-20 16:14:44,840 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
- 2025-07-20 16:14:44,842 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
- 2025-07-20 16:14:44,844 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
- 2025-07-20 16:14:44,846 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
- 2025-07-20 16:14:44,848 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
- 2025-07-20 16:14:44,851 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
- 2025-07-20 16:14:44,852 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
- 2025-07-20 16:14:44,898 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
- 2025-07-20 16:14:44,932 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
- 2025-07-20 16:14:45,344 - sglang - INFO - [2025-07-20 16:14:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:14:45,344 - __main__ - INFO - [2025-07-20 16:14:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:14:45,344 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:14:50,937 - sglang - INFO - [2025-07-20 16:14:50] The server is fired up and ready to roll!
- 2025-07-20 16:14:50,937 - __main__ - INFO - [2025-07-20 16:14:50] The server is fired up and ready to roll!
- 2025-07-20 16:14:54,784 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:14:54,784 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:14:54,784 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:15:04,786 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:15:04,787 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:15:04,787 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:15:05,007 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
- 2025-07-20 16:15:05,010 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
- 2025-07-20 16:15:05,025 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
- 2025-07-20 16:15:05,037 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
- 2025-07-20 16:15:05,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
- 2025-07-20 16:15:05,046 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
- 2025-07-20 16:15:05,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
- 2025-07-20 16:15:05,068 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
- 2025-07-20 16:15:05,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 16:15:05,089 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
- 2025-07-20 16:15:05,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
- 2025-07-20 16:15:05,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
- 2025-07-20 16:15:05,148 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
- 2025-07-20 16:15:05,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
- 2025-07-20 16:15:05,150 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
- 2025-07-20 16:15:05,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
- 2025-07-20 16:15:05,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
- 2025-07-20 16:15:05,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
- 2025-07-20 16:15:05,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
- 2025-07-20 16:15:05,180 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
- 2025-07-20 16:15:05,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
- 2025-07-20 16:15:05,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
- 2025-07-20 16:15:05,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
- 2025-07-20 16:15:05,238 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
- 2025-07-20 16:15:05,264 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
- 2025-07-20 16:15:05,266 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
- 2025-07-20 16:15:05,267 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
- 2025-07-20 16:15:05,268 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
- 2025-07-20 16:15:05,269 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
- 2025-07-20 16:15:05,285 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
- 2025-07-20 16:15:05,285 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
- 2025-07-20 16:15:05,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
- 2025-07-20 16:15:05,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
- 2025-07-20 16:15:05,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
- 2025-07-20 16:15:05,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
- 2025-07-20 16:15:05,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
- 2025-07-20 16:15:05,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
- 2025-07-20 16:15:05,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
- 2025-07-20 16:15:05,375 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
- 2025-07-20 16:15:05,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
- 2025-07-20 16:15:05,378 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
- 2025-07-20 16:15:05,381 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
- 2025-07-20 16:15:05,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
- 2025-07-20 16:15:05,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
- 2025-07-20 16:15:05,455 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
- 2025-07-20 16:15:05,458 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
- 2025-07-20 16:15:05,462 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
- 2025-07-20 16:15:05,464 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
- 2025-07-20 16:15:05,469 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
- 2025-07-20 16:15:05,482 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
- 2025-07-20 16:15:05,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
- 2025-07-20 16:15:05,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
- 2025-07-20 16:15:05,565 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
- 2025-07-20 16:15:05,634 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
- 2025-07-20 16:15:05,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
- 2025-07-20 16:15:05,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
- 2025-07-20 16:15:05,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
- 2025-07-20 16:15:05,656 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
- 2025-07-20 16:15:05,657 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
- 2025-07-20 16:15:05,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
- 2025-07-20 16:15:05,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
- 2025-07-20 16:15:05,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
- 2025-07-20 16:15:05,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
- 2025-07-20 16:15:05,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
- 2025-07-20 16:15:05,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
- 2025-07-20 16:15:05,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
- 2025-07-20 16:15:05,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
- 2025-07-20 16:15:05,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
- 2025-07-20 16:15:05,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
- 2025-07-20 16:15:05,757 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
- 2025-07-20 16:15:05,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
- 2025-07-20 16:15:05,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
- 2025-07-20 16:15:05,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
- 2025-07-20 16:15:05,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
- 2025-07-20 16:15:05,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
- 2025-07-20 16:15:05,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
- 2025-07-20 16:15:05,851 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 16:15:05,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 16:15:05,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
- 2025-07-20 16:15:05,943 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 16:15:05,943 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
- 2025-07-20 16:15:05,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
- 2025-07-20 16:15:05,956 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
- 2025-07-20 16:15:05,957 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
- 2025-07-20 16:15:05,959 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 16:15:06,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
- 2025-07-20 16:15:06,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
- 2025-07-20 16:15:06,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
- 2025-07-20 16:15:06,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
- 2025-07-20 16:15:06,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
- 2025-07-20 16:15:06,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
- 2025-07-20 16:15:06,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 16:15:06,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 16:15:06,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
- 2025-07-20 16:15:06,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
- 2025-07-20 16:15:06,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
- 2025-07-20 16:15:06,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
- 2025-07-20 16:15:06,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
- 2025-07-20 16:15:06,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
- 2025-07-20 16:15:06,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
- 2025-07-20 16:15:06,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
- 2025-07-20 16:15:06,155 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
- 2025-07-20 16:15:06,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
- 2025-07-20 16:15:06,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
- 2025-07-20 16:15:06,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
- 2025-07-20 16:15:06,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
- 2025-07-20 16:15:06,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
- 2025-07-20 16:15:06,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
- 2025-07-20 16:15:06,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 16:15:06,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
- 2025-07-20 16:15:06,333 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
- 2025-07-20 16:15:06,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
- 2025-07-20 16:15:06,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
- 2025-07-20 16:15:06,354 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
- 2025-07-20 16:15:06,355 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
- 2025-07-20 16:15:06,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
- 2025-07-20 16:15:06,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
- 2025-07-20 16:15:06,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
- 2025-07-20 16:15:06,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
- 2025-07-20 16:15:06,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
- 2025-07-20 16:15:06,441 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
- 2025-07-20 16:15:06,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
- 2025-07-20 16:15:06,448 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
- 2025-07-20 16:15:06,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
- 2025-07-20 16:15:06,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
- 2025-07-20 16:15:06,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
- 2025-07-20 16:15:06,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
- 2025-07-20 16:15:06,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
- 2025-07-20 16:15:06,748 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
- 2025-07-20 16:15:06,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
- 2025-07-20 16:15:06,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
- 2025-07-20 16:15:06,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
- 2025-07-20 16:15:06,833 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
- 2025-07-20 16:15:06,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
- 2025-07-20 16:15:06,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
- 2025-07-20 16:15:07,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
- 2025-07-20 16:15:07,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
- 2025-07-20 16:15:07,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
- 2025-07-20 16:15:07,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
- 2025-07-20 16:15:07,047 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
- 2025-07-20 16:15:07,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
- 2025-07-20 16:15:07,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
- 2025-07-20 16:15:07,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
- 2025-07-20 16:15:07,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
- 2025-07-20 16:15:07,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
- 2025-07-20 16:15:07,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
- 2025-07-20 16:15:07,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
- 2025-07-20 16:15:07,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
- 2025-07-20 16:15:07,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
- 2025-07-20 16:15:07,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
- 2025-07-20 16:15:07,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
- 2025-07-20 16:15:07,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
- 2025-07-20 16:15:07,147 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
- 2025-07-20 16:15:07,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
- 2025-07-20 16:15:07,197 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
- 2025-07-20 16:15:07,197 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
- 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
- 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
- 2025-07-20 16:15:07,199 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
- 2025-07-20 16:15:07,199 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
- 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
- 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
- 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
- 2025-07-20 16:15:07,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
- 2025-07-20 16:15:07,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
- 2025-07-20 16:15:07,235 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
- 2025-07-20 16:15:07,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
- 2025-07-20 16:15:07,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
- 2025-07-20 16:15:07,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
- 2025-07-20 16:15:07,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
- 2025-07-20 16:15:07,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
- 2025-07-20 16:15:07,252 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
- 2025-07-20 16:15:07,254 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
- 2025-07-20 16:15:07,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
- 2025-07-20 16:15:07,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
- 2025-07-20 16:15:07,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
- 2025-07-20 16:15:07,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
- 2025-07-20 16:15:07,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
- 2025-07-20 16:15:07,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
- 2025-07-20 16:15:07,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
- 2025-07-20 16:15:07,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
- 2025-07-20 16:15:07,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
- 2025-07-20 16:15:07,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
- 2025-07-20 16:15:07,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
- 2025-07-20 16:15:07,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
- 2025-07-20 16:15:07,348 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
- 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
- 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
- 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
- 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
- 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
- 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
- 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
- 2025-07-20 16:15:07,452 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
- 2025-07-20 16:15:07,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
- 2025-07-20 16:15:07,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
- 2025-07-20 16:15:07,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
- 2025-07-20 16:15:07,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 16:15:07,458 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
- 2025-07-20 16:15:07,459 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
- 2025-07-20 16:15:07,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
- 2025-07-20 16:15:07,462 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
- 2025-07-20 16:15:07,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
- 2025-07-20 16:15:07,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
- 2025-07-20 16:15:07,537 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
- 2025-07-20 16:15:07,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
- 2025-07-20 16:15:07,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 16:15:07,540 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
- 2025-07-20 16:15:07,542 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
- 2025-07-20 16:15:07,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
- 2025-07-20 16:15:07,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
- 2025-07-20 16:15:07,545 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
- 2025-07-20 16:15:07,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
- 2025-07-20 16:15:07,548 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
- 2025-07-20 16:15:07,556 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
- 2025-07-20 16:15:07,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
- 2025-07-20 16:15:07,643 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
- 2025-07-20 16:15:07,644 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
- 2025-07-20 16:15:07,645 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
- 2025-07-20 16:15:07,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
- 2025-07-20 16:15:07,647 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
- 2025-07-20 16:15:07,649 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
- 2025-07-20 16:15:07,649 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
- 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
- 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
- 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
- 2025-07-20 16:15:08,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
- 2025-07-20 16:15:14,788 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:15:14,788 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:15:14,832 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:18:28,967 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:18:28,969 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:18:28,969 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:18:28,977 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:18:29,059 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
- 2025-07-20 16:18:29,244 - __main__ - INFO - Starting pipeline with PID 613024
- 2025-07-20 16:18:29,244 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:18:29,324 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:18:30,353 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:18:31,464 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:18:32,575 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:18:33,606 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:18:34,646 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:18:35,785 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:18:36,849 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:18:37,680 - sglang - INFO - [2025-07-20 16:18:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192343309, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:18:37,680 - __main__ - INFO - [2025-07-20 16:18:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192343309, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:18:38,005 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:18:38,814 - sglang - INFO - [2025-07-20 16:18:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:18:38,814 - __main__ - INFO - [2025-07-20 16:18:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:18:39,081 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:18:40,153 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:18:41,222 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:18:42,289 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:18:43,365 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:18:44,417 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:18:44,888 - sglang - INFO - [2025-07-20 16:18:44 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:18:44,888 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:18:44,891 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:18:44,891 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Init torch distributed begin.
- 2025-07-20 16:18:44,892 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Init torch distributed begin.
- 2025-07-20 16:18:45,506 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:18:46,579 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:18:47,612 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:18:48,660 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:18:49,698 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:18:50,309 - sglang - INFO - [2025-07-20 16:18:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:18:50,309 - __main__ - INFO - [2025-07-20 16:18:50 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:18:50,770 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:18:51,075 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:18:51,075 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:18:51,844 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:18:52,912 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:18:53,980 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:18:55,049 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:18:56,105 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:18:57,153 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:18:58,218 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:18:59,285 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:19:00,356 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:19:01,420 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:19:02,454 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:19:03,513 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:19:04,081 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.01s/it]
- 2025-07-20 16:19:04,082 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.01s/it]
- 2025-07-20 16:19:04,589 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:19:05,678 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:19:06,741 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:19:07,809 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:19:08,876 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:19:09,923 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:19:10,988 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:19:12,056 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:19:13,126 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:19:14,196 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:19:15,260 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:19:16,329 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:19:17,395 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.18s/it]
- 2025-07-20 16:19:17,396 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.18s/it]
- 2025-07-20 16:19:17,397 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:19:18,470 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:19:19,525 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:19:20,596 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:19:21,663 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:19:22,730 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:19:23,798 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:19:24,865 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:19:25,937 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:19:27,006 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:19:28,074 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:19:29,142 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:19:30,212 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:19:30,327 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
- 2025-07-20 16:19:30,328 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
- 2025-07-20 16:19:31,290 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:19:32,359 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:19:33,430 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:19:34,496 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:19:34,890 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.71s/it]
- 2025-07-20 16:19:34,890 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.71s/it]
- 2025-07-20 16:19:34,890 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:19:34,890 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:19:34,891 - sglang - INFO -
- 2025-07-20 16:19:34,891 - __main__ - INFO -
- 2025-07-20 16:19:34,982 - sglang - INFO - [2025-07-20 16:19:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:19:34,982 - __main__ - INFO - [2025-07-20 16:19:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:19:34,994 - sglang - INFO - [2025-07-20 16:19:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:19:34,995 - __main__ - INFO - [2025-07-20 16:19:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:19:34,995 - sglang - INFO - [2025-07-20 16:19:34 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:19:34,995 - __main__ - INFO - [2025-07-20 16:19:34 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:19:35,237 - sglang - INFO - [2025-07-20 16:19:35 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:19:35,237 - __main__ - INFO - [2025-07-20 16:19:35 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:19:35,532 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:19:36,582 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:19:37,572 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.52s/it]
50%|█████ | 2/4 [00:01<00:01, 1.26it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.81it/s]
100%|██████████| 4/4 [00:02<00:00, 2.28it/s]
100%|██████████| 4/4 [00:02<00:00, 1.72it/s]
- 2025-07-20 16:19:37,573 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.52s/it]
50%|█████ | 2/4 [00:01<00:01, 1.26it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.81it/s]
100%|██████████| 4/4 [00:02<00:00, 2.28it/s]
100%|██████████| 4/4 [00:02<00:00, 1.72it/s]
- 2025-07-20 16:19:37,573 - sglang - INFO - [2025-07-20 16:19:37 TP0] Capture cuda graph end. Time elapsed: 2.34 s
- 2025-07-20 16:19:37,573 - __main__ - INFO - [2025-07-20 16:19:37 TP0] Capture cuda graph end. Time elapsed: 2.34 s
- 2025-07-20 16:19:37,658 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:19:38,318 - sglang - INFO - [2025-07-20 16:19:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:19:38,318 - __main__ - INFO - [2025-07-20 16:19:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:19:38,758 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:19:38,758 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 16:19:38,758 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:19:38,758 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:19:38,758 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:19:38,759 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
- 2025-07-20 16:19:38,772 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
- 2025-07-20 16:19:38,774 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
- 2025-07-20 16:19:38,777 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
- 2025-07-20 16:19:38,779 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
- 2025-07-20 16:19:38,781 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
- 2025-07-20 16:19:38,783 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
- 2025-07-20 16:19:38,785 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
- 2025-07-20 16:19:38,787 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
- 2025-07-20 16:19:38,790 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
- 2025-07-20 16:19:38,792 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
- 2025-07-20 16:19:38,794 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
- 2025-07-20 16:19:38,796 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
- 2025-07-20 16:19:38,798 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 16:19:38,801 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
- 2025-07-20 16:19:38,803 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
- 2025-07-20 16:19:38,805 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
- 2025-07-20 16:19:38,806 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
- 2025-07-20 16:19:38,808 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
- 2025-07-20 16:19:38,810 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
- 2025-07-20 16:19:38,812 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
- 2025-07-20 16:19:38,813 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
- 2025-07-20 16:19:38,815 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
- 2025-07-20 16:19:38,817 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
- 2025-07-20 16:19:38,819 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
- 2025-07-20 16:19:38,821 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
- 2025-07-20 16:19:38,823 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
- 2025-07-20 16:19:38,825 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
- 2025-07-20 16:19:38,827 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
- 2025-07-20 16:19:38,936 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
- 2025-07-20 16:19:38,940 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
- 2025-07-20 16:19:39,442 - sglang - INFO - [2025-07-20 16:19:39 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:19:39,442 - __main__ - INFO - [2025-07-20 16:19:39 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:19:39,442 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:19:48,040 - sglang - INFO - [2025-07-20 16:19:48] The server is fired up and ready to roll!
- 2025-07-20 16:19:48,041 - __main__ - INFO - [2025-07-20 16:19:48] The server is fired up and ready to roll!
- 2025-07-20 16:19:48,833 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:19:48,833 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:19:48,834 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:19:58,820 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
- 2025-07-20 16:19:58,828 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
- 2025-07-20 16:19:58,834 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
- 2025-07-20 16:19:58,834 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:19:58,835 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:19:58,835 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:19:58,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
- 2025-07-20 16:19:58,857 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
- 2025-07-20 16:19:58,859 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
- 2025-07-20 16:19:58,859 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
- 2025-07-20 16:19:58,878 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 16:19:58,883 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
- 2025-07-20 16:19:58,884 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
- 2025-07-20 16:19:58,939 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
- 2025-07-20 16:19:58,939 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
- 2025-07-20 16:19:58,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
- 2025-07-20 16:19:58,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
- 2025-07-20 16:19:58,952 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
- 2025-07-20 16:19:58,954 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
- 2025-07-20 16:19:58,955 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
- 2025-07-20 16:19:58,963 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
- 2025-07-20 16:19:58,964 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
- 2025-07-20 16:19:58,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
- 2025-07-20 16:19:58,970 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
- 2025-07-20 16:19:58,974 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
- 2025-07-20 16:19:58,979 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
- 2025-07-20 16:19:59,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
- 2025-07-20 16:19:59,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
- 2025-07-20 16:19:59,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
- 2025-07-20 16:19:59,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
- 2025-07-20 16:19:59,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
- 2025-07-20 16:19:59,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
- 2025-07-20 16:19:59,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
- 2025-07-20 16:19:59,057 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
- 2025-07-20 16:19:59,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
- 2025-07-20 16:19:59,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
- 2025-07-20 16:19:59,060 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
- 2025-07-20 16:19:59,061 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
- 2025-07-20 16:19:59,063 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
- 2025-07-20 16:19:59,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
- 2025-07-20 16:19:59,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
- 2025-07-20 16:19:59,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
- 2025-07-20 16:19:59,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
- 2025-07-20 16:19:59,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
- 2025-07-20 16:19:59,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
- 2025-07-20 16:19:59,235 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
- 2025-07-20 16:19:59,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
- 2025-07-20 16:19:59,244 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
- 2025-07-20 16:19:59,245 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
- 2025-07-20 16:19:59,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
- 2025-07-20 16:19:59,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
- 2025-07-20 16:19:59,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
- 2025-07-20 16:19:59,252 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
- 2025-07-20 16:19:59,257 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
- 2025-07-20 16:19:59,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
- 2025-07-20 16:19:59,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
- 2025-07-20 16:19:59,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
- 2025-07-20 16:19:59,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
- 2025-07-20 16:19:59,348 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
- 2025-07-20 16:19:59,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
- 2025-07-20 16:19:59,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
- 2025-07-20 16:19:59,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
- 2025-07-20 16:19:59,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
- 2025-07-20 16:19:59,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
- 2025-07-20 16:19:59,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
- 2025-07-20 16:19:59,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
- 2025-07-20 16:19:59,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
- 2025-07-20 16:19:59,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
- 2025-07-20 16:19:59,441 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
- 2025-07-20 16:19:59,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
- 2025-07-20 16:19:59,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
- 2025-07-20 16:19:59,444 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
- 2025-07-20 16:19:59,448 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
- 2025-07-20 16:19:59,533 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
- 2025-07-20 16:19:59,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 16:19:59,537 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
- 2025-07-20 16:19:59,636 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
- 2025-07-20 16:19:59,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
- 2025-07-20 16:19:59,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
- 2025-07-20 16:19:59,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 16:19:59,641 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
- 2025-07-20 16:19:59,642 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
- 2025-07-20 16:19:59,643 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
- 2025-07-20 16:19:59,645 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
- 2025-07-20 16:19:59,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 16:19:59,650 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 16:19:59,652 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
- 2025-07-20 16:19:59,737 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
- 2025-07-20 16:19:59,737 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
- 2025-07-20 16:19:59,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
- 2025-07-20 16:19:59,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
- 2025-07-20 16:19:59,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
- 2025-07-20 16:19:59,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
- 2025-07-20 16:19:59,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
- 2025-07-20 16:19:59,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
- 2025-07-20 16:19:59,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
- 2025-07-20 16:19:59,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
- 2025-07-20 16:19:59,754 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
- 2025-07-20 16:19:59,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
- 2025-07-20 16:19:59,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
- 2025-07-20 16:19:59,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
- 2025-07-20 16:19:59,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
- 2025-07-20 16:19:59,837 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
- 2025-07-20 16:19:59,852 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
- 2025-07-20 16:19:59,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
- 2025-07-20 16:19:59,856 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
- 2025-07-20 16:19:59,935 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
- 2025-07-20 16:20:00,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
- 2025-07-20 16:20:00,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
- 2025-07-20 16:20:00,042 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
- 2025-07-20 16:20:00,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
- 2025-07-20 16:20:00,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
- 2025-07-20 16:20:00,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
- 2025-07-20 16:20:00,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
- 2025-07-20 16:20:00,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
- 2025-07-20 16:20:00,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
- 2025-07-20 16:20:00,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 16:20:00,136 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
- 2025-07-20 16:20:00,138 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
- 2025-07-20 16:20:00,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 16:20:00,141 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
- 2025-07-20 16:20:00,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
- 2025-07-20 16:20:00,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
- 2025-07-20 16:20:00,333 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
- 2025-07-20 16:20:00,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
- 2025-07-20 16:20:00,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
- 2025-07-20 16:20:00,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
- 2025-07-20 16:20:00,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
- 2025-07-20 16:20:00,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
- 2025-07-20 16:20:00,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
- 2025-07-20 16:20:00,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 16:20:00,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
- 2025-07-20 16:20:00,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
- 2025-07-20 16:20:00,352 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
- 2025-07-20 16:20:00,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
- 2025-07-20 16:20:00,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
- 2025-07-20 16:20:00,552 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
- 2025-07-20 16:20:00,635 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
- 2025-07-20 16:20:00,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
- 2025-07-20 16:20:00,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
- 2025-07-20 16:20:00,743 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
- 2025-07-20 16:20:00,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
- 2025-07-20 16:20:00,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
- 2025-07-20 16:20:00,789 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
- 2025-07-20 16:20:00,789 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
- 2025-07-20 16:20:00,790 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
- 2025-07-20 16:20:00,834 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
- 2025-07-20 16:20:00,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
- 2025-07-20 16:20:00,837 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
- 2025-07-20 16:20:00,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
- 2025-07-20 16:20:00,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
- 2025-07-20 16:20:00,843 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
- 2025-07-20 16:20:00,944 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
- 2025-07-20 16:20:00,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
- 2025-07-20 16:20:00,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
- 2025-07-20 16:20:00,948 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
- 2025-07-20 16:20:00,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
- 2025-07-20 16:20:00,952 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
- 2025-07-20 16:20:00,954 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
- 2025-07-20 16:20:00,958 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
- 2025-07-20 16:20:00,961 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
- 2025-07-20 16:20:00,964 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
- 2025-07-20 16:20:00,966 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
- 2025-07-20 16:20:00,966 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
- 2025-07-20 16:20:00,968 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
- 2025-07-20 16:20:01,034 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
- 2025-07-20 16:20:01,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
- 2025-07-20 16:20:01,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
- 2025-07-20 16:20:01,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
- 2025-07-20 16:20:01,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
- 2025-07-20 16:20:01,047 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
- 2025-07-20 16:20:01,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
- 2025-07-20 16:20:01,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
- 2025-07-20 16:20:01,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 16:20:01,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
- 2025-07-20 16:20:01,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
- 2025-07-20 16:20:01,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
- 2025-07-20 16:20:01,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
- 2025-07-20 16:20:01,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
- 2025-07-20 16:20:01,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
- 2025-07-20 16:20:01,060 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 16:20:01,062 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
- 2025-07-20 16:20:01,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
- 2025-07-20 16:20:01,066 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
- 2025-07-20 16:20:01,068 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
- 2025-07-20 16:20:01,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
- 2025-07-20 16:20:01,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
- 2025-07-20 16:20:01,138 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
- 2025-07-20 16:20:01,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
- 2025-07-20 16:20:01,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
- 2025-07-20 16:20:01,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
- 2025-07-20 16:20:01,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
- 2025-07-20 16:20:01,155 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
- 2025-07-20 16:20:01,157 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
- 2025-07-20 16:20:01,159 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
- 2025-07-20 16:20:01,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
- 2025-07-20 16:20:01,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
- 2025-07-20 16:20:01,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
- 2025-07-20 16:20:01,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
- 2025-07-20 16:20:01,250 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
- 2025-07-20 16:20:01,250 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
- 2025-07-20 16:20:01,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
- 2025-07-20 16:20:01,257 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
- 2025-07-20 16:20:01,260 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
- 2025-07-20 16:20:01,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
- 2025-07-20 16:20:01,265 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
- 2025-07-20 16:20:01,266 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
- 2025-07-20 16:20:01,267 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
- 2025-07-20 16:20:01,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
- 2025-07-20 16:20:01,340 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
- 2025-07-20 16:20:01,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
- 2025-07-20 16:20:01,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 16:20:01,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
- 2025-07-20 16:20:01,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
- 2025-07-20 16:20:01,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
- 2025-07-20 16:20:01,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
- 2025-07-20 16:20:01,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
- 2025-07-20 16:20:01,361 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
- 2025-07-20 16:20:01,363 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
- 2025-07-20 16:20:01,466 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
- 2025-07-20 16:20:01,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
- 2025-07-20 16:20:01,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
- 2025-07-20 16:20:01,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
- 2025-07-20 16:20:01,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
- 2025-07-20 16:20:01,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
- 2025-07-20 16:20:01,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
- 2025-07-20 16:20:01,551 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
- 2025-07-20 16:20:01,553 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
- 2025-07-20 16:20:01,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
- 2025-07-20 16:20:01,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
- 2025-07-20 16:20:01,641 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
- 2025-07-20 16:20:08,836 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 16:20:08,836 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:20:08,838 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 228
- 2025-07-20 16:33:56,631 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:33:56,632 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:33:56,632 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:33:56,634 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:33:56,634 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:33:56,642 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:33:56,642 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:33:56,712 - __main__ - INFO - Calculated items_per_group: 6 based on average pages per PDF: 7.60
- 2025-07-20 16:33:56,920 - __main__ - INFO - Starting pipeline with PID 617074
- 2025-07-20 16:33:56,920 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:33:57,002 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:33:58,034 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:33:59,069 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:34:00,132 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:34:01,200 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:34:02,267 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:34:03,336 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:34:04,405 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:34:05,473 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:34:05,746 - sglang - INFO - [2025-07-20 16:34:05] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=464142597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:34:05,746 - __main__ - INFO - [2025-07-20 16:34:05] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=464142597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:34:06,569 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:34:07,018 - sglang - INFO - [2025-07-20 16:34:07] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:34:07,018 - __main__ - INFO - [2025-07-20 16:34:07] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:34:07,647 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:34:08,718 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:34:09,773 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:34:10,849 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:34:11,914 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:34:12,987 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:34:13,404 - sglang - INFO - [2025-07-20 16:34:13 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:34:13,404 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:34:13,406 - sglang - INFO - [2025-07-20 16:34:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:34:13,406 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:34:13,407 - sglang - INFO - [2025-07-20 16:34:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:34:13,407 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:34:13,407 - sglang - INFO - [2025-07-20 16:34:13 TP0] Init torch distributed begin.
- 2025-07-20 16:34:13,407 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Init torch distributed begin.
- 2025-07-20 16:34:14,050 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:34:15,103 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:34:16,148 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:34:17,212 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:34:18,281 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:34:19,042 - sglang - INFO - [2025-07-20 16:34:19 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:34:19,042 - __main__ - INFO - [2025-07-20 16:34:19 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:34:19,359 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:34:19,831 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:34:19,831 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:34:20,438 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:34:21,508 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:34:22,578 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:34:23,610 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:34:24,659 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:34:25,724 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:34:26,790 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:34:27,860 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:34:28,930 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:34:29,999 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:34:31,065 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:34:32,127 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:34:32,771 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.94s/it]
- 2025-07-20 16:34:32,771 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.94s/it]
- 2025-07-20 16:34:33,192 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:34:34,257 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:34:35,330 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:34:36,398 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:34:37,467 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:34:38,538 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:34:39,570 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:34:40,633 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:34:41,699 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:34:42,764 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:34:43,819 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:34:44,924 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:34:45,782 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.98s/it]
- 2025-07-20 16:34:45,783 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.98s/it]
- 2025-07-20 16:34:45,974 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:34:47,016 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:34:48,071 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:34:49,137 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:34:50,199 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:34:51,254 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:34:52,309 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:34:53,372 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:34:54,437 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:34:55,516 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:34:56,582 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:34:57,647 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:34:58,522 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:38<00:12, 12.87s/it]
- 2025-07-20 16:34:58,523 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:38<00:12, 12.87s/it]
- 2025-07-20 16:34:58,725 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:34:59,793 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:35:00,858 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:35:01,926 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:35:02,990 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.55s/it]
- 2025-07-20 16:35:02,990 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.55s/it]
- 2025-07-20 16:35:02,990 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.78s/it]
- 2025-07-20 16:35:02,990 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.78s/it]
- 2025-07-20 16:35:02,990 - sglang - INFO -
- 2025-07-20 16:35:02,990 - __main__ - INFO -
- 2025-07-20 16:35:02,992 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:35:03,051 - sglang - INFO - [2025-07-20 16:35:03 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:35:03,051 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:35:03,064 - sglang - INFO - [2025-07-20 16:35:03 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:35:03,064 - __main__ - INFO - [2025-07-20 16:35:03 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:35:03,064 - sglang - INFO - [2025-07-20 16:35:03 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:35:03,064 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:35:03,298 - sglang - INFO - [2025-07-20 16:35:03 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:35:03,298 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:35:04,070 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:35:05,114 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:35:05,737 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.51s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.74it/s]
100%|██████████| 4/4 [00:02<00:00, 2.13it/s]
100%|██████████| 4/4 [00:02<00:00, 1.64it/s]
- 2025-07-20 16:35:05,737 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.51s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.74it/s]
100%|██████████| 4/4 [00:02<00:00, 2.13it/s]
100%|██████████| 4/4 [00:02<00:00, 1.64it/s]
- 2025-07-20 16:35:05,737 - sglang - INFO - [2025-07-20 16:35:05 TP0] Capture cuda graph end. Time elapsed: 2.44 s
- 2025-07-20 16:35:05,737 - __main__ - INFO - [2025-07-20 16:35:05 TP0] Capture cuda graph end. Time elapsed: 2.44 s
- 2025-07-20 16:35:06,193 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 16:35:06,546 - sglang - INFO - [2025-07-20 16:35:06 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:35:06,546 - __main__ - INFO - [2025-07-20 16:35:06 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:35:07,291 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:35:07,291 - __main__ - INFO - Queue remaining: 5
- 2025-07-20 16:35:07,291 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:35:07,291 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:35:07,292 - __main__ - INFO - Worker 0 processing work item edf9b7fc807863af5dce0a8b6f28c0cb86ca7661
- 2025-07-20 16:35:07,292 - __main__ - INFO - Created all tasks for edf9b7fc807863af5dce0a8b6f28c0cb86ca7661
- 2025-07-20 16:35:07,297 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
- 2025-07-20 16:35:07,301 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
- 2025-07-20 16:35:07,304 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
- 2025-07-20 16:35:07,335 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
- 2025-07-20 16:35:07,338 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
- 2025-07-20 16:35:07,341 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 16:35:07,641 - sglang - INFO - [2025-07-20 16:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:35:07,642 - __main__ - INFO - [2025-07-20 16:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:35:07,642 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:35:13,233 - sglang - INFO - [2025-07-20 16:35:13] The server is fired up and ready to roll!
- 2025-07-20 16:35:13,233 - __main__ - INFO - [2025-07-20 16:35:13] The server is fired up and ready to roll!
- 2025-07-20 16:35:17,294 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:35:17,294 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:35:17,294 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 43
- 2025-07-20 16:35:25,173 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
- 2025-07-20 16:35:25,175 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
- 2025-07-20 16:35:25,189 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
- 2025-07-20 16:35:25,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
- 2025-07-20 16:35:25,210 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
- 2025-07-20 16:35:25,223 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
- 2025-07-20 16:35:25,240 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
- 2025-07-20 16:35:25,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
- 2025-07-20 16:35:25,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
- 2025-07-20 16:35:25,269 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
- 2025-07-20 16:35:25,280 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
- 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
- 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
- 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
- 2025-07-20 16:35:25,289 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
- 2025-07-20 16:35:25,293 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
- 2025-07-20 16:35:25,307 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
- 2025-07-20 16:35:25,335 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
- 2025-07-20 16:35:25,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
- 2025-07-20 16:35:25,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
- 2025-07-20 16:35:25,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
- 2025-07-20 16:35:25,340 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
- 2025-07-20 16:35:25,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
- 2025-07-20 16:35:25,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
- 2025-07-20 16:35:25,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
- 2025-07-20 16:35:25,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
- 2025-07-20 16:35:25,372 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
- 2025-07-20 16:35:25,372 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
- 2025-07-20 16:35:25,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
- 2025-07-20 16:35:25,378 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
- 2025-07-20 16:35:25,388 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
- 2025-07-20 16:35:25,396 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
- 2025-07-20 16:35:25,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
- 2025-07-20 16:35:25,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
- 2025-07-20 16:35:25,459 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 16:35:25,466 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 16:35:25,567 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
- 2025-07-20 16:35:25,578 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
- 2025-07-20 16:35:25,635 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 16:35:25,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 16:35:25,669 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 16:35:25,683 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 16:35:25,686 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 16:35:27,333 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:35:27,333 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:35:27,335 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 43
- 2025-07-20 16:35:37,336 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:35:37,337 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:35:37,337 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 43
- 2025-07-20 16:36:06,835 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 16:36:06,836 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:36:06,836 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:36:06,836 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 43
- 2025-07-20 16:36:06,838 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-4 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-2 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-5 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-3 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-1 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-7 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-9 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-2 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-5 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-8 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-3 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-6 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-1 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-4 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-4 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-9 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-5 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-3 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-6 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-10 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-2 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-1 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-7 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-8 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-5 cancelled
- 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-6 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-2 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-1 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-3 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-4 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-4 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-5 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-2 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-1 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-3 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-6 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-6 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-1 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-4 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-7 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-2 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-5 cancelled
- 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-3 cancelled
- 2025-07-20 16:36:06,842 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 16:36:44,402 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:36:44,403 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:36:44,403 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:36:44,404 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:36:44,406 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:36:44,406 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:36:44,410 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:36:44,410 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:36:44,413 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:36:44,413 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:36:44,485 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 7.60
- 2025-07-20 16:36:44,676 - __main__ - INFO - Starting pipeline with PID 620226
- 2025-07-20 16:36:44,676 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:36:44,756 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:36:45,806 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:36:46,849 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:36:47,908 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:36:48,971 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:36:50,035 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:36:51,102 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:36:52,170 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:36:53,236 - sglang - INFO - [2025-07-20 16:36:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=701089678, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:36:53,236 - __main__ - INFO - [2025-07-20 16:36:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=701089678, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:36:53,237 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:36:54,297 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:36:54,375 - sglang - INFO - [2025-07-20 16:36:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:36:54,375 - __main__ - INFO - [2025-07-20 16:36:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:36:55,373 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:36:56,440 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:36:57,508 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:36:58,590 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:36:59,670 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:37:00,747 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:37:00,886 - sglang - INFO - [2025-07-20 16:37:00 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:37:00,886 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:37:00,888 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:37:00,888 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Init torch distributed begin.
- 2025-07-20 16:37:00,889 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Init torch distributed begin.
- 2025-07-20 16:37:01,778 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:37:02,827 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:37:03,889 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:37:04,956 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:37:06,023 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:37:06,516 - sglang - INFO - [2025-07-20 16:37:06 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:37:06,516 - __main__ - INFO - [2025-07-20 16:37:06 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:37:07,100 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:37:07,234 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:37:07,234 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:37:08,177 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:37:09,244 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:37:10,280 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:37:11,344 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:37:12,413 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:37:13,481 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:37:14,537 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:37:15,605 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:37:16,672 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:37:17,740 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:37:18,804 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:37:19,865 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:37:20,077 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.84s/it]
- 2025-07-20 16:37:20,077 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.84s/it]
- 2025-07-20 16:37:20,941 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:37:22,013 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:37:23,081 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:37:24,148 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:37:25,221 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:37:26,290 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:37:27,371 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:37:28,424 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:37:29,485 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:37:30,553 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:37:31,625 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:37:32,693 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:37:33,113 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.96s/it]
- 2025-07-20 16:37:33,113 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.96s/it]
- 2025-07-20 16:37:33,770 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:37:34,838 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:37:35,891 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:37:36,953 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:37:38,021 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:37:39,088 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:37:40,148 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:37:41,216 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:37:42,284 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:37:43,352 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:37:44,420 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:37:45,489 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:37:46,557 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:37:46,698 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.24s/it]
- 2025-07-20 16:37:46,698 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.24s/it]
- 2025-07-20 16:37:47,635 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:37:48,701 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:37:49,769 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:37:50,837 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:37:51,177 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.78s/it]
- 2025-07-20 16:37:51,177 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.78s/it]
- 2025-07-20 16:37:51,178 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.99s/it]
- 2025-07-20 16:37:51,178 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.99s/it]
- 2025-07-20 16:37:51,178 - sglang - INFO -
- 2025-07-20 16:37:51,178 - __main__ - INFO -
- 2025-07-20 16:37:51,259 - sglang - INFO - [2025-07-20 16:37:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:37:51,259 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:37:51,273 - sglang - INFO - [2025-07-20 16:37:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:37:51,273 - __main__ - INFO - [2025-07-20 16:37:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:37:51,273 - sglang - INFO - [2025-07-20 16:37:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:37:51,273 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:37:51,499 - sglang - INFO - [2025-07-20 16:37:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:37:51,499 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:37:51,913 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:37:52,969 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:37:54,044 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.47s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.66it/s]
100%|██████████| 4/4 [00:02<00:00, 1.98it/s]
100%|██████████| 4/4 [00:02<00:00, 1.58it/s]
- 2025-07-20 16:37:54,044 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.47s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.66it/s]
100%|██████████| 4/4 [00:02<00:00, 1.98it/s]
100%|██████████| 4/4 [00:02<00:00, 1.58it/s]
- 2025-07-20 16:37:54,044 - sglang - INFO - [2025-07-20 16:37:54 TP0] Capture cuda graph end. Time elapsed: 2.54 s
- 2025-07-20 16:37:54,044 - __main__ - INFO - [2025-07-20 16:37:54 TP0] Capture cuda graph end. Time elapsed: 2.54 s
- 2025-07-20 16:37:54,045 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 16:37:54,823 - sglang - INFO - [2025-07-20 16:37:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:37:54,823 - __main__ - INFO - [2025-07-20 16:37:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:37:55,133 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:37:55,134 - __main__ - INFO - Queue remaining: 30
- 2025-07-20 16:37:55,134 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:37:55,134 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:37:55,134 - __main__ - INFO - Worker 0 processing work item 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 16:37:55,134 - __main__ - INFO - Created all tasks for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 16:37:55,137 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 16:37:55,933 - sglang - INFO - [2025-07-20 16:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:37:55,933 - __main__ - INFO - [2025-07-20 16:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:37:55,933 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:37:57,761 - sglang - INFO - [2025-07-20 16:37:57] The server is fired up and ready to roll!
- 2025-07-20 16:37:57,761 - __main__ - INFO - [2025-07-20 16:37:57] The server is fired up and ready to roll!
- 2025-07-20 16:38:03,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 16:38:03,368 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 16:38:03,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 16:38:03,420 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 16:38:03,471 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 16:38:03,506 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 16:38:03,549 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 16:38:05,135 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 16:38:05,137 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:38:05,137 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 16:38:15,137 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 16:38:15,144 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:38:15,144 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 16:54:15,726 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 16:54:15,729 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 16:54:15,729 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 16:54:15,730 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 16:54:15,736 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 16:54:15,736 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 16:54:15,738 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 16:54:15,738 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 16:54:15,824 - __main__ - INFO - Calculated items_per_group: 6 based on average pages per PDF: 7.60
- 2025-07-20 16:54:16,011 - __main__ - INFO - Starting pipeline with PID 623290
- 2025-07-20 16:54:16,011 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 16:54:16,255 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 16:54:17,316 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 16:54:18,370 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 16:54:19,432 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 16:54:20,498 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 16:54:21,562 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 16:54:22,650 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 16:54:23,691 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 16:54:24,420 - sglang - INFO - [2025-07-20 16:54:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=105231769, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:54:24,420 - __main__ - INFO - [2025-07-20 16:54:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=105231769, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 16:54:24,768 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 16:54:25,447 - sglang - INFO - [2025-07-20 16:54:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:54:25,447 - __main__ - INFO - [2025-07-20 16:54:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 16:54:25,842 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 16:54:26,898 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 16:54:27,944 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 16:54:29,010 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 16:54:30,085 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 16:54:31,166 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 16:54:31,851 - sglang - INFO - [2025-07-20 16:54:31 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:54:31,851 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Init torch distributed begin.
- 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Init torch distributed begin.
- 2025-07-20 16:54:32,212 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 16:54:33,258 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 16:54:34,292 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 16:54:35,331 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 16:54:36,388 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 16:54:37,433 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 16:54:37,450 - sglang - INFO - [2025-07-20 16:54:37 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:54:37,450 - __main__ - INFO - [2025-07-20 16:54:37 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 16:54:38,145 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:54:38,145 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 16:54:38,488 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 16:54:39,553 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 16:54:40,600 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 16:54:41,642 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 16:54:42,701 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 16:54:43,795 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 16:54:44,873 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 16:54:45,917 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 16:54:46,975 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 16:54:48,022 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 16:54:49,063 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 16:54:50,121 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 16:54:51,187 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 16:54:51,489 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:40, 13.34s/it]
- 2025-07-20 16:54:51,489 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:40, 13.34s/it]
- 2025-07-20 16:54:52,236 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 16:54:53,274 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 16:54:54,326 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 16:54:55,374 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 16:54:56,416 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 16:54:57,468 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 16:54:58,533 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 16:54:59,607 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 16:55:00,675 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 16:55:01,742 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 16:55:02,805 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 16:55:03,872 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 16:55:04,483 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.14s/it]
- 2025-07-20 16:55:04,483 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.14s/it]
- 2025-07-20 16:55:04,952 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 16:55:06,023 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 16:55:07,095 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 16:55:08,161 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 16:55:09,237 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 16:55:10,312 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 16:55:11,379 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 16:55:12,435 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 16:55:13,501 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 16:55:14,567 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 16:55:15,633 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 16:55:16,700 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 16:55:17,389 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.03s/it]
- 2025-07-20 16:55:17,390 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.03s/it]
- 2025-07-20 16:55:17,780 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 16:55:18,853 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 16:55:19,924 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 16:55:20,992 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 16:55:21,963 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.69s/it]
- 2025-07-20 16:55:21,963 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.69s/it]
- 2025-07-20 16:55:21,963 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:55:21,963 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
- 2025-07-20 16:55:21,963 - sglang - INFO -
- 2025-07-20 16:55:21,963 - __main__ - INFO -
- 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 16:55:22,071 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 16:55:22,295 - sglang - INFO - [2025-07-20 16:55:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:55:22,295 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 16:55:23,152 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 16:55:24,233 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 16:55:24,722 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.53s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.73it/s]
100%|██████████| 4/4 [00:02<00:00, 2.16it/s]
100%|██████████| 4/4 [00:02<00:00, 1.65it/s]
- 2025-07-20 16:55:24,723 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.53s/it]
50%|█████ | 2/4 [00:01<00:01, 1.23it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.73it/s]
100%|██████████| 4/4 [00:02<00:00, 2.16it/s]
100%|██████████| 4/4 [00:02<00:00, 1.65it/s]
- 2025-07-20 16:55:24,723 - sglang - INFO - [2025-07-20 16:55:24 TP0] Capture cuda graph end. Time elapsed: 2.43 s
- 2025-07-20 16:55:24,723 - __main__ - INFO - [2025-07-20 16:55:24 TP0] Capture cuda graph end. Time elapsed: 2.43 s
- 2025-07-20 16:55:25,313 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
- 2025-07-20 16:55:25,475 - sglang - INFO - [2025-07-20 16:55:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:55:25,476 - __main__ - INFO - [2025-07-20 16:55:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 16:55:26,406 - __main__ - INFO - sglang server is ready.
- 2025-07-20 16:55:26,406 - __main__ - INFO - Queue remaining: 5
- 2025-07-20 16:55:26,406 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:55:26,407 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 16:55:26,407 - __main__ - INFO - Worker 0 processing work item 550d87ac0b148afaac52196ab4f139412015292f
- 2025-07-20 16:55:26,407 - __main__ - INFO - Created all tasks for 550d87ac0b148afaac52196ab4f139412015292f
- 2025-07-20 16:55:26,415 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
- 2025-07-20 16:55:26,418 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
- 2025-07-20 16:55:26,422 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
- 2025-07-20 16:55:26,469 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
- 2025-07-20 16:55:26,474 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
- 2025-07-20 16:55:26,478 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
- 2025-07-20 16:55:26,643 - sglang - INFO - [2025-07-20 16:55:26 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:55:26,643 - __main__ - INFO - [2025-07-20 16:55:26 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:55:26,643 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:55:30,036 - sglang - INFO - [2025-07-20 16:55:30] The server is fired up and ready to roll!
- 2025-07-20 16:55:30,036 - __main__ - INFO - [2025-07-20 16:55:30] The server is fired up and ready to roll!
- 2025-07-20 16:55:36,408 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:55:36,408 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:55:36,408 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 64
- 2025-07-20 16:55:38,901 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
- 2025-07-20 16:55:38,933 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
- 2025-07-20 16:55:38,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
- 2025-07-20 16:55:39,006 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
- 2025-07-20 16:55:39,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
- 2025-07-20 16:55:39,076 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
- 2025-07-20 16:55:39,088 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
- 2025-07-20 16:55:39,093 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
- 2025-07-20 16:55:39,095 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
- 2025-07-20 16:55:39,119 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
- 2025-07-20 16:55:39,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
- 2025-07-20 16:55:39,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
- 2025-07-20 16:55:39,180 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
- 2025-07-20 16:55:39,183 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
- 2025-07-20 16:55:39,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
- 2025-07-20 16:55:39,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
- 2025-07-20 16:55:39,279 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
- 2025-07-20 16:55:39,288 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
- 2025-07-20 16:55:39,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
- 2025-07-20 16:55:39,352 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
- 2025-07-20 16:55:39,382 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 16:55:39,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
- 2025-07-20 16:55:39,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
- 2025-07-20 16:55:39,469 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
- 2025-07-20 16:55:39,475 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
- 2025-07-20 16:55:39,535 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
- 2025-07-20 16:55:39,535 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
- 2025-07-20 16:55:39,561 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
- 2025-07-20 16:55:39,563 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
- 2025-07-20 16:55:39,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
- 2025-07-20 16:55:39,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 16:55:39,647 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
- 2025-07-20 16:55:39,651 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
- 2025-07-20 16:55:39,655 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
- 2025-07-20 16:55:39,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
- 2025-07-20 16:55:39,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
- 2025-07-20 16:55:39,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
- 2025-07-20 16:55:39,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
- 2025-07-20 16:55:39,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
- 2025-07-20 16:55:39,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
- 2025-07-20 16:55:39,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
- 2025-07-20 16:55:39,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
- 2025-07-20 16:55:39,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
- 2025-07-20 16:55:39,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
- 2025-07-20 16:55:39,849 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 16:55:39,850 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
- 2025-07-20 16:55:39,856 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
- 2025-07-20 16:55:39,934 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
- 2025-07-20 16:55:39,935 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
- 2025-07-20 16:55:39,938 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
- 2025-07-20 16:55:39,940 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
- 2025-07-20 16:55:39,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
- 2025-07-20 16:55:39,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
- 2025-07-20 16:55:40,033 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
- 2025-07-20 16:55:40,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
- 2025-07-20 16:55:40,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
- 2025-07-20 16:55:40,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
- 2025-07-20 16:55:40,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
- 2025-07-20 16:55:40,141 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
- 2025-07-20 16:55:40,241 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
- 2025-07-20 16:55:40,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
- 2025-07-20 16:55:40,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
- 2025-07-20 16:55:40,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
- 2025-07-20 16:55:40,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
- 2025-07-20 16:55:46,433 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:55:46,544 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:55:46,544 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 64
- 2025-07-20 16:55:56,545 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:55:56,550 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:55:56,552 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 64
- 2025-07-20 16:56:00,641 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 16:56:03,561 - sglang - INFO - [2025-07-20 16:56:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2606, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 16:56:03,561 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 16:56:06,553 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 16:56:06,615 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 16:56:06,615 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 64
- 2025-07-20 17:03:31,482 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 17:03:31,483 - __main__ - INFO - Loading file at olmocr_workspace/job_1753002204/input.pdf as PDF document
- 2025-07-20 17:03:31,483 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 17:03:31,488 - __main__ - INFO - Calculated items_per_group: 3 based on average pages per PDF: 14.00
- 2025-07-20 17:03:31,716 - __main__ - INFO - Starting pipeline with PID 626896
- 2025-07-20 17:03:31,717 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 17:03:31,822 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 17:03:32,865 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 17:03:33,924 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 17:03:34,989 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 17:03:36,055 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 17:03:37,123 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 17:03:38,194 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 17:03:39,253 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 17:03:39,365 - sglang - INFO - [2025-07-20 17:03:39] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378607899, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:03:39,365 - __main__ - INFO - [2025-07-20 17:03:39] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378607899, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:03:40,332 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 17:03:40,392 - sglang - INFO - [2025-07-20 17:03:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:03:40,392 - __main__ - INFO - [2025-07-20 17:03:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:03:41,411 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 17:03:42,482 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 17:03:43,556 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 17:03:44,619 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 17:03:45,689 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 17:03:46,665 - sglang - INFO - [2025-07-20 17:03:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:03:46,665 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:03:46,667 - sglang - INFO - [2025-07-20 17:03:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:03:46,668 - sglang - INFO - [2025-07-20 17:03:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:03:46,668 - sglang - INFO - [2025-07-20 17:03:46 TP0] Init torch distributed begin.
- 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Init torch distributed begin.
- 2025-07-20 17:03:46,767 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 17:03:47,838 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 17:03:48,901 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 17:03:49,955 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 17:03:51,022 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 17:03:52,087 - sglang - INFO - [2025-07-20 17:03:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:03:52,087 - __main__ - INFO - [2025-07-20 17:03:52 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:03:52,088 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 17:03:52,787 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:03:52,787 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:03:53,167 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 17:03:54,238 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 17:03:55,308 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 17:03:56,379 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 17:03:57,450 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 17:03:58,520 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 17:03:59,590 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 17:04:00,660 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 17:04:01,730 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 17:04:02,796 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 17:04:03,863 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 17:04:04,930 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 17:04:05,832 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.04s/it]
- 2025-07-20 17:04:05,832 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.04s/it]
- 2025-07-20 17:04:05,996 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 17:04:07,061 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 17:04:08,126 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 17:04:09,188 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 17:04:10,254 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 17:04:11,322 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 17:04:12,387 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 17:04:13,455 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 17:04:14,527 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 17:04:15,594 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 17:04:16,660 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 17:04:17,726 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 17:04:18,793 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 17:04:19,022 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.13s/it]
- 2025-07-20 17:04:19,022 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.13s/it]
- 2025-07-20 17:04:19,872 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 17:04:20,938 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 17:04:22,001 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 17:04:23,053 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 17:04:24,123 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 17:04:25,188 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
- 2025-07-20 17:04:26,254 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
- 2025-07-20 17:04:27,319 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
- 2025-07-20 17:04:28,390 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
- 2025-07-20 17:04:29,460 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
- 2025-07-20 17:04:30,531 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
- 2025-07-20 17:04:31,602 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
- 2025-07-20 17:04:32,016 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
- 2025-07-20 17:04:32,017 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
- 2025-07-20 17:04:32,672 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
- 2025-07-20 17:04:33,740 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
- 2025-07-20 17:04:34,808 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
- 2025-07-20 17:04:35,875 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
- 2025-07-20 17:04:36,607 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.72s/it]
- 2025-07-20 17:04:36,607 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.72s/it]
- 2025-07-20 17:04:36,607 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.96s/it]
- 2025-07-20 17:04:36,607 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.96s/it]
- 2025-07-20 17:04:36,607 - sglang - INFO -
- 2025-07-20 17:04:36,608 - __main__ - INFO -
- 2025-07-20 17:04:36,695 - sglang - INFO - [2025-07-20 17:04:36 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:04:36,695 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:04:36,708 - sglang - INFO - [2025-07-20 17:04:36 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:04:36,708 - __main__ - INFO - [2025-07-20 17:04:36 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:04:36,709 - sglang - INFO - [2025-07-20 17:04:36 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:04:36,709 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:04:36,953 - sglang - INFO - [2025-07-20 17:04:36 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:04:36,953 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:04:36,955 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
- 2025-07-20 17:04:38,030 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
- 2025-07-20 17:04:39,108 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
- 2025-07-20 17:04:39,303 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.46s/it]
50%|█████ | 2/4 [00:01<00:01, 1.28it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.78it/s]
100%|██████████| 4/4 [00:02<00:00, 2.17it/s]
100%|██████████| 4/4 [00:02<00:00, 1.69it/s]
- 2025-07-20 17:04:39,303 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:04, 1.46s/it]
50%|█████ | 2/4 [00:01<00:01, 1.28it/s]
75%|███████▌ | 3/4 [00:02<00:00, 1.78it/s]
100%|██████████| 4/4 [00:02<00:00, 2.17it/s]
100%|██████████| 4/4 [00:02<00:00, 1.69it/s]
- 2025-07-20 17:04:39,303 - sglang - INFO - [2025-07-20 17:04:39 TP0] Capture cuda graph end. Time elapsed: 2.38 s
- 2025-07-20 17:04:39,303 - __main__ - INFO - [2025-07-20 17:04:39 TP0] Capture cuda graph end. Time elapsed: 2.38 s
- 2025-07-20 17:04:40,096 - sglang - INFO - [2025-07-20 17:04:40 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:04:40,096 - __main__ - INFO - [2025-07-20 17:04:40 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:04:40,187 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
- 2025-07-20 17:04:41,255 - sglang - INFO - [2025-07-20 17:04:41 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:04:41,255 - __main__ - INFO - [2025-07-20 17:04:41 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:04:41,255 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:04:41,327 - __main__ - INFO - sglang server is ready.
- 2025-07-20 17:04:41,328 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:04:41,328 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:04:41,328 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 17:04:41,329 - __main__ - INFO - Worker 0 processing work item b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
- 2025-07-20 17:04:41,329 - __main__ - INFO - Created all tasks for b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
- 2025-07-20 17:04:41,335 - __main__ - INFO - Got 14 pages to do for olmocr_workspace/job_1753002204/input.pdf in worker 0
- 2025-07-20 17:04:42,536 - sglang - INFO - [2025-07-20 17:04:42] The server is fired up and ready to roll!
- 2025-07-20 17:04:42,537 - __main__ - INFO - [2025-07-20 17:04:42] The server is fired up and ready to roll!
- 2025-07-20 17:04:48,506 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-1
- 2025-07-20 17:04:48,532 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-3
- 2025-07-20 17:04:48,545 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-2
- 2025-07-20 17:04:48,574 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-4
- 2025-07-20 17:04:48,587 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-5
- 2025-07-20 17:04:48,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-8
- 2025-07-20 17:04:48,628 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-7
- 2025-07-20 17:04:48,667 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-9
- 2025-07-20 17:04:48,701 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-14
- 2025-07-20 17:04:48,719 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-6
- 2025-07-20 17:04:48,724 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-13
- 2025-07-20 17:04:48,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-12
- 2025-07-20 17:04:48,727 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-11
- 2025-07-20 17:04:48,766 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-10
- 2025-07-20 17:04:51,330 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:04:51,331 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:04:51,331 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:05:01,333 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:01,339 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:05:01,339 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:05:09,504 - sglang - INFO - [2025-07-20 17:05:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:05:09,504 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:05:10,800 - sglang - INFO - [2025-07-20 17:05:10 TP0] Prefill batch. #new-seq: 6, #new-token: 13951, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 7
- 2025-07-20 17:05:10,800 - __main__ - INFO - sglang running req: 1 queue req: 7
- 2025-07-20 17:05:11,340 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:11,341 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:05:11,341 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:05:15,773 - sglang - INFO - [2025-07-20 17:05:15 TP0] Decode batch. #running-req: 7, #token: 16003, token usage: 0.42, gen throughput (token/s): 6.67, #queue-req: 7
- 2025-07-20 17:05:15,773 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:05:16,649 - sglang - INFO - [2025-07-20 17:05:16 TP0] Decode batch. #running-req: 7, #token: 16283, token usage: 0.43, gen throughput (token/s): 319.87, #queue-req: 7
- 2025-07-20 17:05:16,649 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:05:17,526 - sglang - INFO - [2025-07-20 17:05:17 TP0] Decode batch. #running-req: 7, #token: 16563, token usage: 0.44, gen throughput (token/s): 319.22, #queue-req: 7
- 2025-07-20 17:05:17,526 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:05:18,250 - sglang - INFO - [2025-07-20 17:05:18 TP0] Prefill batch. #new-seq: 2, #new-token: 5220, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.40, #running-req: 6, #queue-req: 5
- 2025-07-20 17:05:18,250 - __main__ - INFO - sglang running req: 6 queue req: 5
- 2025-07-20 17:05:19,869 - sglang - INFO - [2025-07-20 17:05:19 TP0] Decode batch. #running-req: 8, #token: 20559, token usage: 0.54, gen throughput (token/s): 122.04, #queue-req: 5
- 2025-07-20 17:05:19,870 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:20,753 - sglang - INFO - [2025-07-20 17:05:20 TP0] Decode batch. #running-req: 8, #token: 20879, token usage: 0.55, gen throughput (token/s): 362.21, #queue-req: 5
- 2025-07-20 17:05:20,753 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:21,343 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:21,344 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 12.42 12.42
- sglang_output_tokens 1.33 1.33
- 2025-07-20 17:05:21,344 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 14
- 2025-07-20 17:05:21,638 - sglang - INFO - [2025-07-20 17:05:21 TP0] Decode batch. #running-req: 8, #token: 21199, token usage: 0.56, gen throughput (token/s): 361.55, #queue-req: 5
- 2025-07-20 17:05:21,638 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:22,530 - sglang - INFO - [2025-07-20 17:05:22 TP0] Decode batch. #running-req: 8, #token: 21519, token usage: 0.57, gen throughput (token/s): 358.67, #queue-req: 5
- 2025-07-20 17:05:22,530 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:23,418 - sglang - INFO - [2025-07-20 17:05:23 TP0] Decode batch. #running-req: 8, #token: 21839, token usage: 0.57, gen throughput (token/s): 360.39, #queue-req: 5
- 2025-07-20 17:05:23,418 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:24,306 - sglang - INFO - [2025-07-20 17:05:24 TP0] Decode batch. #running-req: 8, #token: 22159, token usage: 0.58, gen throughput (token/s): 360.31, #queue-req: 5
- 2025-07-20 17:05:24,306 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:25,195 - sglang - INFO - [2025-07-20 17:05:25 TP0] Decode batch. #running-req: 8, #token: 22479, token usage: 0.59, gen throughput (token/s): 359.83, #queue-req: 5
- 2025-07-20 17:05:25,196 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:26,089 - sglang - INFO - [2025-07-20 17:05:26 TP0] Decode batch. #running-req: 8, #token: 22799, token usage: 0.60, gen throughput (token/s): 358.14, #queue-req: 5
- 2025-07-20 17:05:26,089 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:26,982 - sglang - INFO - [2025-07-20 17:05:26 TP0] Decode batch. #running-req: 8, #token: 23119, token usage: 0.61, gen throughput (token/s): 358.06, #queue-req: 5
- 2025-07-20 17:05:26,983 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:05:27,407 - sglang - INFO - [2025-07-20 17:05:27 TP0] Prefill batch. #new-seq: 2, #new-token: 4561, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 3
- 2025-07-20 17:05:27,407 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:05:28,829 - sglang - INFO - [2025-07-20 17:05:28 TP0] Prefill batch. #new-seq: 2, #new-token: 4178, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 8, #queue-req: 1
- 2025-07-20 17:05:28,829 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:05:30,568 - sglang - INFO - [2025-07-20 17:05:30 TP0] Decode batch. #running-req: 10, #token: 27341, token usage: 0.72, gen throughput (token/s): 99.85, #queue-req: 1
- 2025-07-20 17:05:30,568 - __main__ - INFO - sglang running req: 10 queue req: 1
- 2025-07-20 17:05:31,345 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:31,345 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 43.84 43.84
- sglang_output_tokens 9.44 9.44
- 2025-07-20 17:05:31,345 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 14
- 2025-07-20 17:05:31,527 - sglang - INFO - [2025-07-20 17:05:31 TP0] Decode batch. #running-req: 10, #token: 27741, token usage: 0.73, gen throughput (token/s): 417.09, #queue-req: 1
- 2025-07-20 17:05:31,527 - __main__ - INFO - sglang running req: 10 queue req: 1
- 2025-07-20 17:05:32,487 - sglang - INFO - [2025-07-20 17:05:32 TP0] Decode batch. #running-req: 10, #token: 28141, token usage: 0.74, gen throughput (token/s): 416.94, #queue-req: 1
- 2025-07-20 17:05:32,487 - __main__ - INFO - sglang running req: 10 queue req: 1
- 2025-07-20 17:05:33,449 - sglang - INFO - [2025-07-20 17:05:33 TP0] Decode batch. #running-req: 10, #token: 28541, token usage: 0.75, gen throughput (token/s): 415.43, #queue-req: 1
- 2025-07-20 17:05:33,450 - __main__ - INFO - sglang running req: 10 queue req: 1
- 2025-07-20 17:05:34,125 - sglang - INFO - [2025-07-20 17:05:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 0
- 2025-07-20 17:05:34,125 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:05:34,181 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 17:05:35,148 - sglang - INFO - [2025-07-20 17:05:35 TP0] Decode batch. #running-req: 10, #token: 28504, token usage: 0.75, gen throughput (token/s): 234.85, #queue-req: 0
- 2025-07-20 17:05:35,149 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:05:36,111 - sglang - INFO - [2025-07-20 17:05:36 TP0] Decode batch. #running-req: 10, #token: 28904, token usage: 0.76, gen throughput (token/s): 415.65, #queue-req: 0
- 2025-07-20 17:05:36,111 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:05:37,078 - sglang - INFO - [2025-07-20 17:05:37 TP0] Decode batch. #running-req: 10, #token: 29304, token usage: 0.77, gen throughput (token/s): 413.52, #queue-req: 0
- 2025-07-20 17:05:37,078 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:05:38,042 - sglang - INFO - [2025-07-20 17:05:38 TP0] Decode batch. #running-req: 10, #token: 29704, token usage: 0.78, gen throughput (token/s): 414.79, #queue-req: 0
- 2025-07-20 17:05:38,042 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:05:39,003 - sglang - INFO - [2025-07-20 17:05:39 TP0] Decode batch. #running-req: 9, #token: 27890, token usage: 0.73, gen throughput (token/s): 403.74, #queue-req: 0
- 2025-07-20 17:05:39,003 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:05:39,917 - sglang - INFO - [2025-07-20 17:05:39 TP0] Decode batch. #running-req: 7, #token: 20877, token usage: 0.55, gen throughput (token/s): 342.44, #queue-req: 0
- 2025-07-20 17:05:39,918 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:05:40,809 - sglang - INFO - [2025-07-20 17:05:40 TP0] Decode batch. #running-req: 6, #token: 17525, token usage: 0.46, gen throughput (token/s): 306.18, #queue-req: 0
- 2025-07-20 17:05:40,809 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:05:41,346 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:41,347 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 154.67 154.67
- sglang_output_tokens 42.48 42.48
- 2025-07-20 17:05:41,347 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 14
- 2025-07-20 17:05:41,691 - sglang - INFO - [2025-07-20 17:05:41 TP0] Decode batch. #running-req: 5, #token: 14516, token usage: 0.38, gen throughput (token/s): 250.44, #queue-req: 0
- 2025-07-20 17:05:41,692 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:05:42,567 - sglang - INFO - [2025-07-20 17:05:42 TP0] Decode batch. #running-req: 5, #token: 14716, token usage: 0.39, gen throughput (token/s): 228.46, #queue-req: 0
- 2025-07-20 17:05:42,567 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:05:43,441 - sglang - INFO - [2025-07-20 17:05:43 TP0] Decode batch. #running-req: 5, #token: 14916, token usage: 0.39, gen throughput (token/s): 228.75, #queue-req: 0
- 2025-07-20 17:05:43,441 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:05:44,294 - sglang - INFO - [2025-07-20 17:05:44 TP0] Decode batch. #running-req: 1, #token: 2806, token usage: 0.07, gen throughput (token/s): 144.16, #queue-req: 0
- 2025-07-20 17:05:44,295 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:45,123 - sglang - INFO - [2025-07-20 17:05:45 TP0] Decode batch. #running-req: 1, #token: 2846, token usage: 0.07, gen throughput (token/s): 48.30, #queue-req: 0
- 2025-07-20 17:05:45,123 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:45,950 - sglang - INFO - [2025-07-20 17:05:45 TP0] Decode batch. #running-req: 1, #token: 2886, token usage: 0.08, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-07-20 17:05:45,950 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:46,776 - sglang - INFO - [2025-07-20 17:05:46 TP0] Decode batch. #running-req: 1, #token: 2926, token usage: 0.08, gen throughput (token/s): 48.45, #queue-req: 0
- 2025-07-20 17:05:46,776 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:47,603 - sglang - INFO - [2025-07-20 17:05:47 TP0] Decode batch. #running-req: 1, #token: 2966, token usage: 0.08, gen throughput (token/s): 48.37, #queue-req: 0
- 2025-07-20 17:05:47,603 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:48,432 - sglang - INFO - [2025-07-20 17:05:48 TP0] Decode batch. #running-req: 1, #token: 3006, token usage: 0.08, gen throughput (token/s): 48.25, #queue-req: 0
- 2025-07-20 17:05:48,432 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:49,267 - sglang - INFO - [2025-07-20 17:05:49 TP0] Decode batch. #running-req: 1, #token: 3046, token usage: 0.08, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:05:49,268 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:50,100 - sglang - INFO - [2025-07-20 17:05:50 TP0] Decode batch. #running-req: 1, #token: 3086, token usage: 0.08, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:05:50,101 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:50,935 - sglang - INFO - [2025-07-20 17:05:50 TP0] Decode batch. #running-req: 1, #token: 3126, token usage: 0.08, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:05:50,935 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:51,348 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:05:51,348 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 212.53 212.53
- sglang_output_tokens 58.00 58.00
- 2025-07-20 17:05:51,348 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:05:51,770 - sglang - INFO - [2025-07-20 17:05:51 TP0] Decode batch. #running-req: 1, #token: 3166, token usage: 0.08, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:05:51,770 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:52,601 - sglang - INFO - [2025-07-20 17:05:52 TP0] Decode batch. #running-req: 1, #token: 3206, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:05:52,601 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:53,431 - sglang - INFO - [2025-07-20 17:05:53 TP0] Decode batch. #running-req: 1, #token: 3246, token usage: 0.09, gen throughput (token/s): 48.23, #queue-req: 0
- 2025-07-20 17:05:53,431 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:05:53,706 - __main__ - INFO - Finished TaskGroup for worker on b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
- 2025-07-20 17:05:53,707 - __main__ - INFO - Got 1 docs for b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
- 2025-07-20 17:05:53,709 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-07-20 17:05:53,710 - __main__ - INFO - Work done
- 2025-07-20 17:05:53,710 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 17:08:01,104 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 17:08:01,104 - __main__ - INFO - Loading file at olmocr_workspace/job_1753002474/input.pdf as PDF document
- 2025-07-20 17:08:01,105 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 17:08:01,109 - __main__ - INFO - Calculated items_per_group: 35 based on average pages per PDF: 14.00
- 2025-07-20 17:08:01,338 - __main__ - INFO - Starting pipeline with PID 628948
- 2025-07-20 17:08:01,339 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 17:08:01,425 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 17:08:02,457 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 17:08:03,505 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 17:08:04,570 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 17:08:05,639 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 17:08:06,710 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 17:08:07,779 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 17:08:08,853 - sglang - INFO - [2025-07-20 17:08:08] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=436041262, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:08:08,854 - __main__ - INFO - [2025-07-20 17:08:08] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=436041262, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:08:08,934 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 17:08:09,809 - sglang - INFO - [2025-07-20 17:08:09] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:08:09,809 - __main__ - INFO - [2025-07-20 17:08:09] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:08:09,995 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 17:08:11,065 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 17:08:12,133 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 17:08:13,208 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 17:08:14,345 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 17:08:15,409 - sglang - INFO - [2025-07-20 17:08:15 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:08:15,409 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:08:15,410 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Init torch distributed begin.
- 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Init torch distributed begin.
- 2025-07-20 17:08:16,473 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 17:08:17,528 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 17:08:18,594 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 17:08:19,659 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 17:08:20,732 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 17:08:20,765 - sglang - INFO - [2025-07-20 17:08:20 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:08:20,765 - __main__ - INFO - [2025-07-20 17:08:20 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:08:21,425 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:08:21,425 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:08:21,792 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 17:08:22,859 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 17:08:23,926 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 17:08:24,992 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 17:08:26,049 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 17:08:27,119 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 17:08:28,189 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 17:08:29,252 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 17:08:30,318 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 17:08:31,386 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 17:08:32,452 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 17:08:33,520 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 17:08:34,574 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 17:08:35,096 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:41, 13.67s/it]
- 2025-07-20 17:08:35,097 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:41, 13.67s/it]
- 2025-07-20 17:08:35,652 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 17:08:36,722 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 17:08:37,782 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 17:08:38,836 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 17:08:39,673 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:18<00:16, 8.32s/it]
- 2025-07-20 17:08:39,673 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:18<00:16, 8.32s/it]
- 2025-07-20 17:08:39,882 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 17:08:40,934 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 17:08:42,000 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 17:08:43,066 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 17:08:44,138 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 17:08:45,208 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 17:08:46,278 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 17:08:47,348 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 17:08:48,415 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 17:08:49,482 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 17:08:49,575 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:28<00:09, 9.04s/it]
- 2025-07-20 17:08:49,576 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:28<00:09, 9.04s/it]
- 2025-07-20 17:08:50,001 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 5.64s/it]
- 2025-07-20 17:08:50,001 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 5.64s/it]
- 2025-07-20 17:08:50,001 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.14s/it]
- 2025-07-20 17:08:50,001 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.14s/it]
- 2025-07-20 17:08:50,001 - sglang - INFO -
- 2025-07-20 17:08:50,001 - __main__ - INFO -
- 2025-07-20 17:08:50,058 - sglang - INFO - [2025-07-20 17:08:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:08:50,058 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:08:50,065 - sglang - INFO - [2025-07-20 17:08:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:08:50,065 - __main__ - INFO - [2025-07-20 17:08:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:08:50,066 - sglang - INFO - [2025-07-20 17:08:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:08:50,066 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:08:50,240 - sglang - INFO - [2025-07-20 17:08:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:08:50,240 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:08:50,562 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 17:08:51,621 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 17:08:52,344 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.20s/it]
50%|█████ | 2/4 [00:01<00:01, 1.49it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.00it/s]
100%|██████████| 4/4 [00:02<00:00, 2.37it/s]
100%|██████████| 4/4 [00:02<00:00, 1.90it/s]
- 2025-07-20 17:08:52,345 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.20s/it]
50%|█████ | 2/4 [00:01<00:01, 1.49it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.00it/s]
100%|██████████| 4/4 [00:02<00:00, 2.37it/s]
100%|██████████| 4/4 [00:02<00:00, 1.90it/s]
- 2025-07-20 17:08:52,345 - sglang - INFO - [2025-07-20 17:08:52 TP0] Capture cuda graph end. Time elapsed: 2.10 s
- 2025-07-20 17:08:52,345 - __main__ - INFO - [2025-07-20 17:08:52 TP0] Capture cuda graph end. Time elapsed: 2.10 s
- 2025-07-20 17:08:52,700 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 17:08:53,118 - sglang - INFO - [2025-07-20 17:08:53 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:08:53,118 - __main__ - INFO - [2025-07-20 17:08:53 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:08:53,800 - __main__ - INFO - sglang server is ready.
- 2025-07-20 17:08:53,800 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:08:53,800 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:08:53,800 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 17:08:53,801 - __main__ - INFO - Worker 0 processing work item dbf48ba2e0ba653560d78d753cde2080c6a38613
- 2025-07-20 17:08:53,801 - __main__ - INFO - Created all tasks for dbf48ba2e0ba653560d78d753cde2080c6a38613
- 2025-07-20 17:08:53,805 - __main__ - INFO - Got 14 pages to do for olmocr_workspace/job_1753002474/input.pdf in worker 0
- 2025-07-20 17:08:54,242 - sglang - INFO - [2025-07-20 17:08:54 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:08:54,242 - __main__ - INFO - [2025-07-20 17:08:54 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:08:54,242 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:08:55,157 - sglang - INFO - [2025-07-20 17:08:55] The server is fired up and ready to roll!
- 2025-07-20 17:08:55,157 - __main__ - INFO - [2025-07-20 17:08:55] The server is fired up and ready to roll!
- 2025-07-20 17:09:00,720 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-2
- 2025-07-20 17:09:00,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-1
- 2025-07-20 17:09:00,745 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-3
- 2025-07-20 17:09:00,778 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-4
- 2025-07-20 17:09:00,785 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-5
- 2025-07-20 17:09:00,812 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-6
- 2025-07-20 17:09:00,826 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-8
- 2025-07-20 17:09:00,908 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-10
- 2025-07-20 17:09:00,940 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-7
- 2025-07-20 17:09:00,957 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-11
- 2025-07-20 17:09:00,963 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-14
- 2025-07-20 17:09:00,977 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-12
- 2025-07-20 17:09:00,994 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-13
- 2025-07-20 17:09:01,003 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-9
- 2025-07-20 17:09:03,833 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:03,833 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:09:03,833 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:09:13,835 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:13,835 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:09:13,836 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:09:21,600 - sglang - INFO - [2025-07-20 17:09:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2170, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:09:21,600 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:09:23,294 - sglang - INFO - [2025-07-20 17:09:23 TP0] Prefill batch. #new-seq: 6, #new-token: 13163, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 7
- 2025-07-20 17:09:23,294 - __main__ - INFO - sglang running req: 1 queue req: 7
- 2025-07-20 17:09:23,837 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:23,837 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:09:23,838 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:09:28,111 - sglang - INFO - [2025-07-20 17:09:28 TP0] Decode batch. #running-req: 7, #token: 15564, token usage: 0.41, gen throughput (token/s): 6.80, #queue-req: 7
- 2025-07-20 17:09:28,111 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:09:28,986 - sglang - INFO - [2025-07-20 17:09:28 TP0] Decode batch. #running-req: 7, #token: 15844, token usage: 0.42, gen throughput (token/s): 319.66, #queue-req: 7
- 2025-07-20 17:09:28,987 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:09:29,863 - sglang - INFO - [2025-07-20 17:09:29 TP0] Decode batch. #running-req: 7, #token: 16124, token usage: 0.42, gen throughput (token/s): 319.40, #queue-req: 7
- 2025-07-20 17:09:29,863 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:09:30,586 - sglang - INFO - [2025-07-20 17:09:30 TP0] Prefill batch. #new-seq: 2, #new-token: 5633, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 5
- 2025-07-20 17:09:30,586 - __main__ - INFO - sglang running req: 6 queue req: 5
- 2025-07-20 17:09:32,328 - sglang - INFO - [2025-07-20 17:09:32 TP0] Decode batch. #running-req: 8, #token: 20533, token usage: 0.54, gen throughput (token/s): 116.05, #queue-req: 5
- 2025-07-20 17:09:32,328 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:33,216 - sglang - INFO - [2025-07-20 17:09:33 TP0] Decode batch. #running-req: 8, #token: 20853, token usage: 0.55, gen throughput (token/s): 360.13, #queue-req: 5
- 2025-07-20 17:09:33,216 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:33,839 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:33,839 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 14.72 14.72
- sglang_output_tokens 1.57 1.57
- 2025-07-20 17:09:33,839 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 14
- 2025-07-20 17:09:34,107 - sglang - INFO - [2025-07-20 17:09:34 TP0] Decode batch. #running-req: 8, #token: 21173, token usage: 0.56, gen throughput (token/s): 359.20, #queue-req: 5
- 2025-07-20 17:09:34,107 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:34,997 - sglang - INFO - [2025-07-20 17:09:34 TP0] Decode batch. #running-req: 8, #token: 21493, token usage: 0.57, gen throughput (token/s): 359.48, #queue-req: 5
- 2025-07-20 17:09:34,997 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:35,889 - sglang - INFO - [2025-07-20 17:09:35 TP0] Decode batch. #running-req: 8, #token: 21813, token usage: 0.57, gen throughput (token/s): 358.90, #queue-req: 5
- 2025-07-20 17:09:35,889 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:36,784 - sglang - INFO - [2025-07-20 17:09:36 TP0] Decode batch. #running-req: 8, #token: 22133, token usage: 0.58, gen throughput (token/s): 357.66, #queue-req: 5
- 2025-07-20 17:09:36,784 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:37,674 - sglang - INFO - [2025-07-20 17:09:37 TP0] Decode batch. #running-req: 8, #token: 22453, token usage: 0.59, gen throughput (token/s): 359.28, #queue-req: 5
- 2025-07-20 17:09:37,674 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:38,564 - sglang - INFO - [2025-07-20 17:09:38 TP0] Decode batch. #running-req: 8, #token: 22773, token usage: 0.60, gen throughput (token/s): 359.64, #queue-req: 5
- 2025-07-20 17:09:38,564 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:39,454 - sglang - INFO - [2025-07-20 17:09:39 TP0] Decode batch. #running-req: 8, #token: 23093, token usage: 0.61, gen throughput (token/s): 359.65, #queue-req: 5
- 2025-07-20 17:09:39,454 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:09:39,835 - sglang - INFO - [2025-07-20 17:09:39 TP0] Prefill batch. #new-seq: 2, #new-token: 4020, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 3
- 2025-07-20 17:09:39,835 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:09:41,635 - sglang - INFO - [2025-07-20 17:09:41 TP0] Decode batch. #running-req: 9, #token: 25145, token usage: 0.66, gen throughput (token/s): 156.81, #queue-req: 3
- 2025-07-20 17:09:41,635 - __main__ - INFO - sglang running req: 9 queue req: 3
- 2025-07-20 17:09:41,752 - sglang - INFO - [2025-07-20 17:09:41 TP0] Prefill batch. #new-seq: 2, #new-token: 4745, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.59, #running-req: 8, #queue-req: 1
- 2025-07-20 17:09:41,752 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:09:43,603 - sglang - INFO - [2025-07-20 17:09:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 0
- 2025-07-20 17:09:43,603 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:09:43,680 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 1 exiting due to empty queue
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 2 exiting due to empty queue
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 3 exiting due to empty queue
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 4 exiting due to empty queue
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 5 exiting due to empty queue
- 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 6 exiting due to empty queue
- 2025-07-20 17:09:43,682 - __main__ - INFO - Worker 7 exiting due to empty queue
- 2025-07-20 17:09:43,840 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:43,841 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 74.67 74.67
- sglang_output_tokens 16.46 16.46
- 2025-07-20 17:09:43,841 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 14
- 2025-07-20 17:09:44,715 - sglang - INFO - [2025-07-20 17:09:44 TP0] Decode batch. #running-req: 10, #token: 27138, token usage: 0.71, gen throughput (token/s): 127.57, #queue-req: 0
- 2025-07-20 17:09:44,716 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:09:45,670 - sglang - INFO - [2025-07-20 17:09:45 TP0] Decode batch. #running-req: 10, #token: 27538, token usage: 0.72, gen throughput (token/s): 419.16, #queue-req: 0
- 2025-07-20 17:09:45,670 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:09:46,624 - sglang - INFO - [2025-07-20 17:09:46 TP0] Decode batch. #running-req: 10, #token: 27938, token usage: 0.74, gen throughput (token/s): 419.29, #queue-req: 0
- 2025-07-20 17:09:46,624 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:09:47,578 - sglang - INFO - [2025-07-20 17:09:47 TP0] Decode batch. #running-req: 9, #token: 25495, token usage: 0.67, gen throughput (token/s): 405.55, #queue-req: 0
- 2025-07-20 17:09:47,578 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:09:48,532 - sglang - INFO - [2025-07-20 17:09:48 TP0] Decode batch. #running-req: 9, #token: 25855, token usage: 0.68, gen throughput (token/s): 377.30, #queue-req: 0
- 2025-07-20 17:09:48,532 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:09:49,488 - sglang - INFO - [2025-07-20 17:09:49 TP0] Decode batch. #running-req: 9, #token: 26215, token usage: 0.69, gen throughput (token/s): 376.66, #queue-req: 0
- 2025-07-20 17:09:49,488 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:09:50,444 - sglang - INFO - [2025-07-20 17:09:50 TP0] Decode batch. #running-req: 9, #token: 26575, token usage: 0.70, gen throughput (token/s): 376.72, #queue-req: 0
- 2025-07-20 17:09:50,444 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:09:51,383 - sglang - INFO - [2025-07-20 17:09:51 TP0] Decode batch. #running-req: 8, #token: 24717, token usage: 0.65, gen throughput (token/s): 366.30, #queue-req: 0
- 2025-07-20 17:09:51,383 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:09:52,278 - sglang - INFO - [2025-07-20 17:09:52 TP0] Decode batch. #running-req: 7, #token: 21392, token usage: 0.56, gen throughput (token/s): 328.44, #queue-req: 0
- 2025-07-20 17:09:52,278 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:09:53,165 - sglang - INFO - [2025-07-20 17:09:53 TP0] Decode batch. #running-req: 6, #token: 18040, token usage: 0.47, gen throughput (token/s): 303.26, #queue-req: 0
- 2025-07-20 17:09:53,165 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:09:53,841 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:09:53,842 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 152.62 152.62
- sglang_output_tokens 39.30 39.30
- 2025-07-20 17:09:53,842 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 14
- 2025-07-20 17:09:54,043 - sglang - INFO - [2025-07-20 17:09:54 TP0] Decode batch. #running-req: 6, #token: 18280, token usage: 0.48, gen throughput (token/s): 273.18, #queue-req: 0
- 2025-07-20 17:09:54,044 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:09:54,927 - sglang - INFO - [2025-07-20 17:09:54 TP0] Decode batch. #running-req: 6, #token: 18520, token usage: 0.49, gen throughput (token/s): 271.55, #queue-req: 0
- 2025-07-20 17:09:54,928 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:09:55,805 - sglang - INFO - [2025-07-20 17:09:55 TP0] Decode batch. #running-req: 5, #token: 15018, token usage: 0.40, gen throughput (token/s): 233.55, #queue-req: 0
- 2025-07-20 17:09:55,805 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:09:56,671 - sglang - INFO - [2025-07-20 17:09:56 TP0] Decode batch. #running-req: 3, #token: 8785, token usage: 0.23, gen throughput (token/s): 190.57, #queue-req: 0
- 2025-07-20 17:09:56,671 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:09:57,510 - sglang - INFO - [2025-07-20 17:09:57 TP0] Decode batch. #running-req: 2, #token: 6007, token usage: 0.16, gen throughput (token/s): 109.58, #queue-req: 0
- 2025-07-20 17:09:57,511 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:09:58,346 - sglang - INFO - [2025-07-20 17:09:58 TP0] Decode batch. #running-req: 2, #token: 6087, token usage: 0.16, gen throughput (token/s): 95.80, #queue-req: 0
- 2025-07-20 17:09:58,346 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:09:59,183 - sglang - INFO - [2025-07-20 17:09:59 TP0] Decode batch. #running-req: 2, #token: 6167, token usage: 0.16, gen throughput (token/s): 95.57, #queue-req: 0
- 2025-07-20 17:09:59,183 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:10:00,018 - sglang - INFO - [2025-07-20 17:10:00 TP0] Decode batch. #running-req: 2, #token: 6247, token usage: 0.16, gen throughput (token/s): 95.80, #queue-req: 0
- 2025-07-20 17:10:00,018 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:10:00,853 - sglang - INFO - [2025-07-20 17:10:00 TP0] Decode batch. #running-req: 2, #token: 6327, token usage: 0.17, gen throughput (token/s): 95.81, #queue-req: 0
- 2025-07-20 17:10:00,853 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:10:01,686 - sglang - INFO - [2025-07-20 17:10:01 TP0] Decode batch. #running-req: 1, #token: 3170, token usage: 0.08, gen throughput (token/s): 87.57, #queue-req: 0
- 2025-07-20 17:10:01,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:10:02,514 - sglang - INFO - [2025-07-20 17:10:02 TP0] Decode batch. #running-req: 1, #token: 3210, token usage: 0.08, gen throughput (token/s): 48.32, #queue-req: 0
- 2025-07-20 17:10:02,514 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:10:03,348 - sglang - INFO - [2025-07-20 17:10:03 TP0] Decode batch. #running-req: 1, #token: 3250, token usage: 0.09, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:10:03,348 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:10:03,543 - __main__ - INFO - Finished TaskGroup for worker on dbf48ba2e0ba653560d78d753cde2080c6a38613
- 2025-07-20 17:10:03,543 - __main__ - INFO - Got 1 docs for dbf48ba2e0ba653560d78d753cde2080c6a38613
- 2025-07-20 17:10:03,545 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-07-20 17:10:03,545 - __main__ - INFO - Work done
- 2025-07-20 17:10:03,546 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 17:17:48,853 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 17:17:48,853 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 17:17:48,853 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-07-20 17:17:48,858 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 11.00
- 2025-07-20 17:17:49,044 - __main__ - INFO - Starting pipeline with PID 631182
- 2025-07-20 17:17:49,045 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 17:17:49,128 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 17:17:50,158 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 17:17:51,204 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 17:17:52,268 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 17:17:53,336 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 17:17:54,390 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 17:17:55,474 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 17:17:56,517 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 17:17:56,945 - sglang - INFO - [2025-07-20 17:17:56] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=531941470, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:17:56,946 - __main__ - INFO - [2025-07-20 17:17:56] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=531941470, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:17:57,567 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 17:17:57,998 - sglang - INFO - [2025-07-20 17:17:57] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:17:57,998 - __main__ - INFO - [2025-07-20 17:17:57] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:17:58,642 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 17:17:59,710 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 17:18:00,777 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 17:18:01,845 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 17:18:02,913 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 17:18:03,703 - sglang - INFO - [2025-07-20 17:18:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:18:03,703 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:18:03,705 - sglang - INFO - [2025-07-20 17:18:03 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:18:03,706 - sglang - INFO - [2025-07-20 17:18:03 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:18:03,706 - sglang - INFO - [2025-07-20 17:18:03 TP0] Init torch distributed begin.
- 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Init torch distributed begin.
- 2025-07-20 17:18:03,990 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 17:18:05,057 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 17:18:06,130 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 17:18:07,198 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 17:18:08,262 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 17:18:09,051 - sglang - INFO - [2025-07-20 17:18:09 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:18:09,052 - __main__ - INFO - [2025-07-20 17:18:09 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:18:09,329 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 17:18:09,727 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:18:09,727 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:18:10,385 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 17:18:11,452 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 17:18:12,507 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 17:18:13,574 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 17:18:14,642 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 17:18:15,710 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 17:18:16,777 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 17:18:17,845 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 17:18:18,919 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 17:18:19,634 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:29, 9.91s/it]
- 2025-07-20 17:18:19,635 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:29, 9.91s/it]
- 2025-07-20 17:18:19,997 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 17:18:21,070 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 17:18:22,138 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 17:18:23,206 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 17:18:24,269 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 17:18:25,333 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 17:18:26,389 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 17:18:27,456 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 17:18:28,524 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 17:18:29,591 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 17:18:30,660 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 17:18:30,854 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:21<00:21, 10.68s/it]
- 2025-07-20 17:18:30,854 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:21<00:21, 10.68s/it]
- 2025-07-20 17:18:31,736 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 17:18:32,804 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 17:18:33,873 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 17:18:34,128 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:24<00:07, 7.30s/it]
- 2025-07-20 17:18:34,129 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:24<00:07, 7.30s/it]
- 2025-07-20 17:18:34,949 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 17:18:36,017 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 17:18:37,090 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 17:18:38,158 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 17:18:38,648 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 6.20s/it]
- 2025-07-20 17:18:38,648 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 6.20s/it]
- 2025-07-20 17:18:38,648 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.23s/it]
- 2025-07-20 17:18:38,648 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.23s/it]
- 2025-07-20 17:18:38,648 - sglang - INFO -
- 2025-07-20 17:18:38,648 - __main__ - INFO -
- 2025-07-20 17:18:38,734 - sglang - INFO - [2025-07-20 17:18:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:18:38,734 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:18:38,741 - sglang - INFO - [2025-07-20 17:18:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:18:38,741 - __main__ - INFO - [2025-07-20 17:18:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:18:38,742 - sglang - INFO - [2025-07-20 17:18:38 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:18:38,742 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:18:38,923 - sglang - INFO - [2025-07-20 17:18:38 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:18:38,923 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:18:39,215 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
- 2025-07-20 17:18:40,272 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
- 2025-07-20 17:18:41,062 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.23s/it]
50%|█████ | 2/4 [00:01<00:01, 1.46it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.97it/s]
100%|██████████| 4/4 [00:02<00:00, 2.34it/s]
100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
- 2025-07-20 17:18:41,062 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.23s/it]
50%|█████ | 2/4 [00:01<00:01, 1.46it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.97it/s]
100%|██████████| 4/4 [00:02<00:00, 2.34it/s]
100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
- 2025-07-20 17:18:41,062 - sglang - INFO - [2025-07-20 17:18:41 TP0] Capture cuda graph end. Time elapsed: 2.14 s
- 2025-07-20 17:18:41,062 - __main__ - INFO - [2025-07-20 17:18:41 TP0] Capture cuda graph end. Time elapsed: 2.14 s
- 2025-07-20 17:18:41,327 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
- 2025-07-20 17:18:41,963 - sglang - INFO - [2025-07-20 17:18:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:18:41,963 - __main__ - INFO - [2025-07-20 17:18:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:18:42,396 - __main__ - INFO - sglang server is ready.
- 2025-07-20 17:18:42,397 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:18:42,397 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:18:42,397 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 17:18:42,397 - __main__ - INFO - Worker 0 processing work item 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:18:42,397 - __main__ - INFO - Created all tasks for 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:18:42,401 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
- 2025-07-20 17:18:43,071 - sglang - INFO - [2025-07-20 17:18:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:18:43,071 - __main__ - INFO - [2025-07-20 17:18:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:18:43,072 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:18:44,432 - sglang - INFO - [2025-07-20 17:18:44] The server is fired up and ready to roll!
- 2025-07-20 17:18:44,432 - __main__ - INFO - [2025-07-20 17:18:44] The server is fired up and ready to roll!
- 2025-07-20 17:18:48,931 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
- 2025-07-20 17:18:48,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
- 2025-07-20 17:18:48,989 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
- 2025-07-20 17:18:48,994 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
- 2025-07-20 17:18:49,017 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
- 2025-07-20 17:18:49,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
- 2025-07-20 17:18:49,062 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 17:18:49,083 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
- 2025-07-20 17:18:49,121 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
- 2025-07-20 17:18:49,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
- 2025-07-20 17:18:49,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
- 2025-07-20 17:18:52,399 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:18:52,400 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:18:52,400 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:19:02,402 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:02,402 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:19:02,402 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:19:10,063 - sglang - INFO - [2025-07-20 17:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2223, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:19:10,063 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:19:11,350 - sglang - INFO - [2025-07-20 17:19:11 TP0] Prefill batch. #new-seq: 6, #new-token: 12840, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 4
- 2025-07-20 17:19:11,350 - __main__ - INFO - sglang running req: 1 queue req: 4
- 2025-07-20 17:19:12,404 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:12,404 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:19:12,404 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:19:16,136 - sglang - INFO - [2025-07-20 17:19:16 TP0] Decode batch. #running-req: 7, #token: 15294, token usage: 0.40, gen throughput (token/s): 6.96, #queue-req: 4
- 2025-07-20 17:19:16,137 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:17,018 - sglang - INFO - [2025-07-20 17:19:17 TP0] Decode batch. #running-req: 7, #token: 15574, token usage: 0.41, gen throughput (token/s): 317.62, #queue-req: 4
- 2025-07-20 17:19:17,018 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:17,900 - sglang - INFO - [2025-07-20 17:19:17 TP0] Decode batch. #running-req: 7, #token: 15854, token usage: 0.42, gen throughput (token/s): 317.28, #queue-req: 4
- 2025-07-20 17:19:17,901 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:18,786 - sglang - INFO - [2025-07-20 17:19:18 TP0] Decode batch. #running-req: 7, #token: 16134, token usage: 0.42, gen throughput (token/s): 316.18, #queue-req: 4
- 2025-07-20 17:19:18,786 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:19,674 - sglang - INFO - [2025-07-20 17:19:19 TP0] Decode batch. #running-req: 7, #token: 16414, token usage: 0.43, gen throughput (token/s): 315.41, #queue-req: 4
- 2025-07-20 17:19:19,674 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:20,561 - sglang - INFO - [2025-07-20 17:19:20 TP0] Decode batch. #running-req: 7, #token: 16694, token usage: 0.44, gen throughput (token/s): 315.71, #queue-req: 4
- 2025-07-20 17:19:20,561 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:21,450 - sglang - INFO - [2025-07-20 17:19:21 TP0] Decode batch. #running-req: 7, #token: 16974, token usage: 0.45, gen throughput (token/s): 314.80, #queue-req: 4
- 2025-07-20 17:19:21,450 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:19:22,007 - sglang - INFO - [2025-07-20 17:19:22 TP0] Prefill batch. #new-seq: 3, #new-token: 6254, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 1
- 2025-07-20 17:19:22,007 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-07-20 17:19:22,406 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:22,406 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 21.16 21.16
- sglang_output_tokens 3.18 3.18
- 2025-07-20 17:19:22,406 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 11
- 2025-07-20 17:19:24,268 - sglang - INFO - [2025-07-20 17:19:24 TP0] Decode batch. #running-req: 9, #token: 21260, token usage: 0.56, gen throughput (token/s): 109.65, #queue-req: 1
- 2025-07-20 17:19:24,268 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:19:25,068 - sglang - INFO - [2025-07-20 17:19:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2051, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.50, #running-req: 8, #queue-req: 0
- 2025-07-20 17:19:25,068 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:19:25,566 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 17:19:25,873 - sglang - INFO - [2025-07-20 17:19:25 TP0] Decode batch. #running-req: 9, #token: 21065, token usage: 0.55, gen throughput (token/s): 223.64, #queue-req: 0
- 2025-07-20 17:19:25,873 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:19:26,803 - sglang - INFO - [2025-07-20 17:19:26 TP0] Decode batch. #running-req: 8, #token: 18964, token usage: 0.50, gen throughput (token/s): 374.26, #queue-req: 0
- 2025-07-20 17:19:26,803 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:19:27,694 - sglang - INFO - [2025-07-20 17:19:27 TP0] Decode batch. #running-req: 8, #token: 19284, token usage: 0.51, gen throughput (token/s): 359.18, #queue-req: 0
- 2025-07-20 17:19:27,694 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:19:28,588 - sglang - INFO - [2025-07-20 17:19:28 TP0] Decode batch. #running-req: 8, #token: 19604, token usage: 0.52, gen throughput (token/s): 358.10, #queue-req: 0
- 2025-07-20 17:19:28,588 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:19:29,477 - sglang - INFO - [2025-07-20 17:19:29 TP0] Decode batch. #running-req: 7, #token: 17500, token usage: 0.46, gen throughput (token/s): 315.90, #queue-req: 0
- 2025-07-20 17:19:29,477 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:19:30,366 - sglang - INFO - [2025-07-20 17:19:30 TP0] Decode batch. #running-req: 7, #token: 17780, token usage: 0.47, gen throughput (token/s): 315.02, #queue-req: 0
- 2025-07-20 17:19:30,366 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:19:31,252 - sglang - INFO - [2025-07-20 17:19:31 TP0] Decode batch. #running-req: 7, #token: 18060, token usage: 0.48, gen throughput (token/s): 316.13, #queue-req: 0
- 2025-07-20 17:19:31,252 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:19:32,136 - sglang - INFO - [2025-07-20 17:19:32 TP0] Decode batch. #running-req: 7, #token: 18340, token usage: 0.48, gen throughput (token/s): 316.81, #queue-req: 0
- 2025-07-20 17:19:32,136 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:19:32,407 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:32,408 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 79.35 79.35
- sglang_output_tokens 14.50 14.50
- 2025-07-20 17:19:32,408 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 11
- 2025-07-20 17:19:33,015 - sglang - INFO - [2025-07-20 17:19:33 TP0] Decode batch. #running-req: 5, #token: 13130, token usage: 0.35, gen throughput (token/s): 261.41, #queue-req: 0
- 2025-07-20 17:19:33,016 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:19:33,890 - sglang - INFO - [2025-07-20 17:19:33 TP0] Decode batch. #running-req: 4, #token: 10913, token usage: 0.29, gen throughput (token/s): 219.53, #queue-req: 0
- 2025-07-20 17:19:33,890 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:19:34,753 - sglang - INFO - [2025-07-20 17:19:34 TP0] Decode batch. #running-req: 4, #token: 11073, token usage: 0.29, gen throughput (token/s): 185.29, #queue-req: 0
- 2025-07-20 17:19:34,753 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:19:35,611 - sglang - INFO - [2025-07-20 17:19:35 TP0] Decode batch. #running-req: 3, #token: 8965, token usage: 0.24, gen throughput (token/s): 145.72, #queue-req: 0
- 2025-07-20 17:19:35,611 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:19:36,468 - sglang - INFO - [2025-07-20 17:19:36 TP0] Decode batch. #running-req: 3, #token: 9085, token usage: 0.24, gen throughput (token/s): 140.04, #queue-req: 0
- 2025-07-20 17:19:36,468 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:19:37,318 - sglang - INFO - [2025-07-20 17:19:37 TP0] Decode batch. #running-req: 2, #token: 6109, token usage: 0.16, gen throughput (token/s): 114.14, #queue-req: 0
- 2025-07-20 17:19:37,318 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:38,161 - sglang - INFO - [2025-07-20 17:19:38 TP0] Decode batch. #running-req: 2, #token: 6189, token usage: 0.16, gen throughput (token/s): 94.91, #queue-req: 0
- 2025-07-20 17:19:38,161 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:39,002 - sglang - INFO - [2025-07-20 17:19:39 TP0] Decode batch. #running-req: 2, #token: 6269, token usage: 0.17, gen throughput (token/s): 95.10, #queue-req: 0
- 2025-07-20 17:19:39,003 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:39,843 - sglang - INFO - [2025-07-20 17:19:39 TP0] Decode batch. #running-req: 2, #token: 6349, token usage: 0.17, gen throughput (token/s): 95.09, #queue-req: 0
- 2025-07-20 17:19:39,844 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:40,687 - sglang - INFO - [2025-07-20 17:19:40 TP0] Decode batch. #running-req: 2, #token: 6429, token usage: 0.17, gen throughput (token/s): 94.83, #queue-req: 0
- 2025-07-20 17:19:40,687 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:41,533 - sglang - INFO - [2025-07-20 17:19:41 TP0] Decode batch. #running-req: 2, #token: 6509, token usage: 0.17, gen throughput (token/s): 94.62, #queue-req: 0
- 2025-07-20 17:19:41,533 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:42,377 - sglang - INFO - [2025-07-20 17:19:42 TP0] Decode batch. #running-req: 2, #token: 6589, token usage: 0.17, gen throughput (token/s): 94.71, #queue-req: 0
- 2025-07-20 17:19:42,378 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:42,409 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:42,409 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 164.72 164.72
- sglang_output_tokens 36.74 36.74
- 2025-07-20 17:19:42,410 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 11
- 2025-07-20 17:19:43,221 - sglang - INFO - [2025-07-20 17:19:43 TP0] Decode batch. #running-req: 2, #token: 6669, token usage: 0.18, gen throughput (token/s): 94.78, #queue-req: 0
- 2025-07-20 17:19:43,221 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:44,067 - sglang - INFO - [2025-07-20 17:19:44 TP0] Decode batch. #running-req: 2, #token: 6749, token usage: 0.18, gen throughput (token/s): 94.54, #queue-req: 0
- 2025-07-20 17:19:44,068 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:44,915 - sglang - INFO - [2025-07-20 17:19:44 TP0] Decode batch. #running-req: 2, #token: 6829, token usage: 0.18, gen throughput (token/s): 94.43, #queue-req: 0
- 2025-07-20 17:19:44,915 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:19:45,761 - sglang - INFO - [2025-07-20 17:19:45 TP0] Decode batch. #running-req: 1, #token: 3355, token usage: 0.09, gen throughput (token/s): 85.05, #queue-req: 0
- 2025-07-20 17:19:45,762 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:46,591 - sglang - INFO - [2025-07-20 17:19:46 TP0] Decode batch. #running-req: 1, #token: 3395, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:19:46,592 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:47,421 - sglang - INFO - [2025-07-20 17:19:47 TP0] Decode batch. #running-req: 1, #token: 3435, token usage: 0.09, gen throughput (token/s): 48.20, #queue-req: 0
- 2025-07-20 17:19:47,421 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:48,258 - sglang - INFO - [2025-07-20 17:19:48 TP0] Decode batch. #running-req: 1, #token: 3475, token usage: 0.09, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:19:48,258 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:49,097 - sglang - INFO - [2025-07-20 17:19:49 TP0] Decode batch. #running-req: 1, #token: 3515, token usage: 0.09, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:19:49,098 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:49,935 - sglang - INFO - [2025-07-20 17:19:49 TP0] Decode batch. #running-req: 1, #token: 3555, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:19:49,936 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:50,774 - sglang - INFO - [2025-07-20 17:19:50 TP0] Decode batch. #running-req: 1, #token: 3595, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:19:50,774 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:51,613 - sglang - INFO - [2025-07-20 17:19:51 TP0] Decode batch. #running-req: 1, #token: 3635, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:19:51,614 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:52,412 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:19:52,412 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 172.26 172.26
- sglang_output_tokens 41.60 41.60
- 2025-07-20 17:19:52,412 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:19:52,453 - sglang - INFO - [2025-07-20 17:19:52 TP0] Decode batch. #running-req: 1, #token: 3675, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:19:52,454 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:53,292 - sglang - INFO - [2025-07-20 17:19:53 TP0] Decode batch. #running-req: 1, #token: 3715, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:19:53,292 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:54,126 - sglang - INFO - [2025-07-20 17:19:54 TP0] Decode batch. #running-req: 1, #token: 3755, token usage: 0.10, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:19:54,126 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:54,962 - sglang - INFO - [2025-07-20 17:19:54 TP0] Decode batch. #running-req: 1, #token: 3795, token usage: 0.10, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:19:54,962 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:55,802 - sglang - INFO - [2025-07-20 17:19:55 TP0] Decode batch. #running-req: 1, #token: 3835, token usage: 0.10, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:19:55,803 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:56,644 - sglang - INFO - [2025-07-20 17:19:56 TP0] Decode batch. #running-req: 1, #token: 3875, token usage: 0.10, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:19:56,644 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:57,484 - sglang - INFO - [2025-07-20 17:19:57 TP0] Decode batch. #running-req: 1, #token: 3915, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:19:57,484 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:58,324 - sglang - INFO - [2025-07-20 17:19:58 TP0] Decode batch. #running-req: 1, #token: 3955, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:19:58,324 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:19:59,165 - sglang - INFO - [2025-07-20 17:19:59 TP0] Decode batch. #running-req: 1, #token: 3995, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:19:59,165 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:00,006 - sglang - INFO - [2025-07-20 17:20:00 TP0] Decode batch. #running-req: 1, #token: 4035, token usage: 0.11, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:20:00,007 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:00,844 - sglang - INFO - [2025-07-20 17:20:00 TP0] Decode batch. #running-req: 1, #token: 4075, token usage: 0.11, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:20:00,844 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:01,680 - sglang - INFO - [2025-07-20 17:20:01 TP0] Decode batch. #running-req: 1, #token: 4115, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:20:01,680 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:02,414 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:02,414 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 159.36 159.36
- sglang_output_tokens 38.49 38.49
- 2025-07-20 17:20:02,414 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:02,520 - sglang - INFO - [2025-07-20 17:20:02 TP0] Decode batch. #running-req: 1, #token: 4155, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:20:02,520 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:03,362 - sglang - INFO - [2025-07-20 17:20:03 TP0] Decode batch. #running-req: 1, #token: 4195, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:20:03,362 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:04,203 - sglang - INFO - [2025-07-20 17:20:04 TP0] Decode batch. #running-req: 1, #token: 4235, token usage: 0.11, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:20:04,204 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:05,045 - sglang - INFO - [2025-07-20 17:20:05 TP0] Decode batch. #running-req: 1, #token: 4275, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:20:05,045 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:05,887 - sglang - INFO - [2025-07-20 17:20:05 TP0] Decode batch. #running-req: 1, #token: 4315, token usage: 0.11, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:20:05,887 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:06,730 - sglang - INFO - [2025-07-20 17:20:06 TP0] Decode batch. #running-req: 1, #token: 4355, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:20:06,730 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:07,573 - sglang - INFO - [2025-07-20 17:20:07 TP0] Decode batch. #running-req: 1, #token: 4395, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:20:07,573 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:08,413 - sglang - INFO - [2025-07-20 17:20:08 TP0] Decode batch. #running-req: 1, #token: 4435, token usage: 0.12, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:08,414 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:09,255 - sglang - INFO - [2025-07-20 17:20:09 TP0] Decode batch. #running-req: 1, #token: 4475, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:20:09,255 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:10,097 - sglang - INFO - [2025-07-20 17:20:10 TP0] Decode batch. #running-req: 1, #token: 4515, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:20:10,097 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:10,940 - sglang - INFO - [2025-07-20 17:20:10 TP0] Decode batch. #running-req: 1, #token: 4555, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:20:10,940 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:11,783 - sglang - INFO - [2025-07-20 17:20:11 TP0] Decode batch. #running-req: 1, #token: 4595, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:20:11,783 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:12,415 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:12,416 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 148.26 148.26
- sglang_output_tokens 35.81 35.81
- 2025-07-20 17:20:12,416 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:12,625 - sglang - INFO - [2025-07-20 17:20:12 TP0] Decode batch. #running-req: 1, #token: 4635, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:20:12,626 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:13,470 - sglang - INFO - [2025-07-20 17:20:13 TP0] Decode batch. #running-req: 1, #token: 4675, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:20:13,470 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:14,314 - sglang - INFO - [2025-07-20 17:20:14 TP0] Decode batch. #running-req: 1, #token: 4715, token usage: 0.12, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:20:14,314 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:15,156 - sglang - INFO - [2025-07-20 17:20:15 TP0] Decode batch. #running-req: 1, #token: 4755, token usage: 0.13, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:20:15,156 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:15,998 - sglang - INFO - [2025-07-20 17:20:15 TP0] Decode batch. #running-req: 1, #token: 4795, token usage: 0.13, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:20:15,999 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:16,839 - sglang - INFO - [2025-07-20 17:20:16 TP0] Decode batch. #running-req: 1, #token: 4835, token usage: 0.13, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:16,839 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:17,680 - sglang - INFO - [2025-07-20 17:20:17 TP0] Decode batch. #running-req: 1, #token: 4875, token usage: 0.13, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:20:17,680 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:18,524 - sglang - INFO - [2025-07-20 17:20:18 TP0] Decode batch. #running-req: 1, #token: 4915, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:20:18,524 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:19,367 - sglang - INFO - [2025-07-20 17:20:19 TP0] Decode batch. #running-req: 1, #token: 4955, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:20:19,367 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:20,211 - sglang - INFO - [2025-07-20 17:20:20 TP0] Decode batch. #running-req: 1, #token: 4995, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:20:20,211 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:21,057 - sglang - INFO - [2025-07-20 17:20:21 TP0] Decode batch. #running-req: 1, #token: 5035, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:20:21,057 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:21,903 - sglang - INFO - [2025-07-20 17:20:21 TP0] Decode batch. #running-req: 1, #token: 5075, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:20:21,903 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:22,036 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:20:22,177 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 17:20:22,326 - sglang - INFO - [2025-07-20 17:20:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:20:22,326 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:20:22,417 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:22,417 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 152.16 152.16
- sglang_output_tokens 53.01 53.01
- 2025-07-20 17:20:22,417 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:23,706 - sglang - INFO - [2025-07-20 17:20:23 TP0] Decode batch. #running-req: 1, #token: 2116, token usage: 0.06, gen throughput (token/s): 22.18, #queue-req: 0
- 2025-07-20 17:20:23,706 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:24,540 - sglang - INFO - [2025-07-20 17:20:24 TP0] Decode batch. #running-req: 1, #token: 2156, token usage: 0.06, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:20:24,540 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:25,376 - sglang - INFO - [2025-07-20 17:20:25 TP0] Decode batch. #running-req: 1, #token: 2196, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:20:25,376 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:26,212 - sglang - INFO - [2025-07-20 17:20:26 TP0] Decode batch. #running-req: 1, #token: 2236, token usage: 0.06, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:20:26,213 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:27,048 - sglang - INFO - [2025-07-20 17:20:27 TP0] Decode batch. #running-req: 1, #token: 2276, token usage: 0.06, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:20:27,049 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:27,886 - sglang - INFO - [2025-07-20 17:20:27 TP0] Decode batch. #running-req: 1, #token: 2316, token usage: 0.06, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:20:27,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:28,724 - sglang - INFO - [2025-07-20 17:20:28 TP0] Decode batch. #running-req: 1, #token: 2356, token usage: 0.06, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:20:28,724 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:29,560 - sglang - INFO - [2025-07-20 17:20:29 TP0] Decode batch. #running-req: 1, #token: 2396, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:20:29,560 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:30,396 - sglang - INFO - [2025-07-20 17:20:30 TP0] Decode batch. #running-req: 1, #token: 2436, token usage: 0.06, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:20:30,396 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:31,226 - sglang - INFO - [2025-07-20 17:20:31 TP0] Decode batch. #running-req: 1, #token: 2476, token usage: 0.07, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:20:31,226 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:32,060 - sglang - INFO - [2025-07-20 17:20:32 TP0] Decode batch. #running-req: 1, #token: 2516, token usage: 0.07, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:20:32,060 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:32,418 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:32,418 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 142.86 142.86
- sglang_output_tokens 49.77 49.77
- 2025-07-20 17:20:32,418 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:32,896 - sglang - INFO - [2025-07-20 17:20:32 TP0] Decode batch. #running-req: 1, #token: 2556, token usage: 0.07, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:20:32,896 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:33,735 - sglang - INFO - [2025-07-20 17:20:33 TP0] Decode batch. #running-req: 1, #token: 2596, token usage: 0.07, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:20:33,735 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:34,573 - sglang - INFO - [2025-07-20 17:20:34 TP0] Decode batch. #running-req: 1, #token: 2636, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:20:34,573 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:35,411 - sglang - INFO - [2025-07-20 17:20:35 TP0] Decode batch. #running-req: 1, #token: 2676, token usage: 0.07, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:20:35,411 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:36,249 - sglang - INFO - [2025-07-20 17:20:36 TP0] Decode batch. #running-req: 1, #token: 2716, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:20:36,249 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:37,089 - sglang - INFO - [2025-07-20 17:20:37 TP0] Decode batch. #running-req: 1, #token: 2756, token usage: 0.07, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:20:37,089 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:37,926 - sglang - INFO - [2025-07-20 17:20:37 TP0] Decode batch. #running-req: 1, #token: 2796, token usage: 0.07, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:20:37,926 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:38,762 - sglang - INFO - [2025-07-20 17:20:38 TP0] Decode batch. #running-req: 1, #token: 2836, token usage: 0.07, gen throughput (token/s): 47.81, #queue-req: 0
- 2025-07-20 17:20:38,763 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:39,600 - sglang - INFO - [2025-07-20 17:20:39 TP0] Decode batch. #running-req: 1, #token: 2876, token usage: 0.08, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:20:39,600 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:40,439 - sglang - INFO - [2025-07-20 17:20:40 TP0] Decode batch. #running-req: 1, #token: 2916, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:20:40,439 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:41,279 - sglang - INFO - [2025-07-20 17:20:41 TP0] Decode batch. #running-req: 1, #token: 2956, token usage: 0.08, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:41,279 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:42,117 - sglang - INFO - [2025-07-20 17:20:42 TP0] Decode batch. #running-req: 1, #token: 2996, token usage: 0.08, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:20:42,117 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:42,419 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:42,419 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 134.63 134.63
- sglang_output_tokens 46.90 46.90
- 2025-07-20 17:20:42,420 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:42,956 - sglang - INFO - [2025-07-20 17:20:42 TP0] Decode batch. #running-req: 1, #token: 3036, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:20:42,956 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:43,796 - sglang - INFO - [2025-07-20 17:20:43 TP0] Decode batch. #running-req: 1, #token: 3076, token usage: 0.08, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:20:43,796 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:44,636 - sglang - INFO - [2025-07-20 17:20:44 TP0] Decode batch. #running-req: 1, #token: 3116, token usage: 0.08, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:44,636 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:45,475 - sglang - INFO - [2025-07-20 17:20:45 TP0] Decode batch. #running-req: 1, #token: 3156, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:20:45,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:46,306 - sglang - INFO - [2025-07-20 17:20:46 TP0] Decode batch. #running-req: 1, #token: 3196, token usage: 0.08, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:20:46,307 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:47,138 - sglang - INFO - [2025-07-20 17:20:47 TP0] Decode batch. #running-req: 1, #token: 3236, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:20:47,138 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:47,977 - sglang - INFO - [2025-07-20 17:20:47 TP0] Decode batch. #running-req: 1, #token: 3276, token usage: 0.09, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:20:47,977 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:48,816 - sglang - INFO - [2025-07-20 17:20:48 TP0] Decode batch. #running-req: 1, #token: 3316, token usage: 0.09, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:20:48,817 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:49,655 - sglang - INFO - [2025-07-20 17:20:49 TP0] Decode batch. #running-req: 1, #token: 3356, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:20:49,655 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:50,495 - sglang - INFO - [2025-07-20 17:20:50 TP0] Decode batch. #running-req: 1, #token: 3396, token usage: 0.09, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:50,495 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:51,336 - sglang - INFO - [2025-07-20 17:20:51 TP0] Decode batch. #running-req: 1, #token: 3436, token usage: 0.09, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:20:51,337 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:52,177 - sglang - INFO - [2025-07-20 17:20:52 TP0] Decode batch. #running-req: 1, #token: 3476, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:20:52,177 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:52,420 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:20:52,420 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 127.29 127.29
- sglang_output_tokens 44.35 44.35
- 2025-07-20 17:20:52,420 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:20:53,014 - sglang - INFO - [2025-07-20 17:20:53 TP0] Decode batch. #running-req: 1, #token: 3516, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:20:53,014 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:53,850 - sglang - INFO - [2025-07-20 17:20:53 TP0] Decode batch. #running-req: 1, #token: 3556, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:20:53,850 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:54,688 - sglang - INFO - [2025-07-20 17:20:54 TP0] Decode batch. #running-req: 1, #token: 3596, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:20:54,689 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:55,530 - sglang - INFO - [2025-07-20 17:20:55 TP0] Decode batch. #running-req: 1, #token: 3636, token usage: 0.10, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:20:55,530 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:56,371 - sglang - INFO - [2025-07-20 17:20:56 TP0] Decode batch. #running-req: 1, #token: 3676, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:20:56,371 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:57,212 - sglang - INFO - [2025-07-20 17:20:57 TP0] Decode batch. #running-req: 1, #token: 3716, token usage: 0.10, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:20:57,212 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:58,055 - sglang - INFO - [2025-07-20 17:20:58 TP0] Decode batch. #running-req: 1, #token: 3756, token usage: 0.10, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:20:58,055 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:58,897 - sglang - INFO - [2025-07-20 17:20:58 TP0] Decode batch. #running-req: 1, #token: 3796, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:20:58,897 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:20:59,737 - sglang - INFO - [2025-07-20 17:20:59 TP0] Decode batch. #running-req: 1, #token: 3836, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:20:59,738 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:00,576 - sglang - INFO - [2025-07-20 17:21:00 TP0] Decode batch. #running-req: 1, #token: 3876, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:21:00,576 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:01,417 - sglang - INFO - [2025-07-20 17:21:01 TP0] Decode batch. #running-req: 1, #token: 3916, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:21:01,417 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:02,258 - sglang - INFO - [2025-07-20 17:21:02 TP0] Decode batch. #running-req: 1, #token: 3956, token usage: 0.10, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:21:02,258 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:02,422 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:02,422 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 120.72 120.72
- sglang_output_tokens 42.06 42.06
- 2025-07-20 17:21:02,422 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:03,101 - sglang - INFO - [2025-07-20 17:21:03 TP0] Decode batch. #running-req: 1, #token: 3996, token usage: 0.11, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:21:03,102 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:03,942 - sglang - INFO - [2025-07-20 17:21:03 TP0] Decode batch. #running-req: 1, #token: 4036, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:21:03,942 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:04,784 - sglang - INFO - [2025-07-20 17:21:04 TP0] Decode batch. #running-req: 1, #token: 4076, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:21:04,784 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:05,628 - sglang - INFO - [2025-07-20 17:21:05 TP0] Decode batch. #running-req: 1, #token: 4116, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:21:05,628 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:06,472 - sglang - INFO - [2025-07-20 17:21:06 TP0] Decode batch. #running-req: 1, #token: 4156, token usage: 0.11, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:21:06,472 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:07,313 - sglang - INFO - [2025-07-20 17:21:07 TP0] Decode batch. #running-req: 1, #token: 4196, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:21:07,313 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:08,151 - sglang - INFO - [2025-07-20 17:21:08 TP0] Decode batch. #running-req: 1, #token: 4236, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:21:08,152 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:08,994 - sglang - INFO - [2025-07-20 17:21:08 TP0] Decode batch. #running-req: 1, #token: 4276, token usage: 0.11, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:21:08,994 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:09,837 - sglang - INFO - [2025-07-20 17:21:09 TP0] Decode batch. #running-req: 1, #token: 4316, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:21:09,837 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:10,680 - sglang - INFO - [2025-07-20 17:21:10 TP0] Decode batch. #running-req: 1, #token: 4356, token usage: 0.11, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:21:10,681 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:11,523 - sglang - INFO - [2025-07-20 17:21:11 TP0] Decode batch. #running-req: 1, #token: 4396, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:21:11,523 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:12,367 - sglang - INFO - [2025-07-20 17:21:12 TP0] Decode batch. #running-req: 1, #token: 4436, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:21:12,367 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:12,423 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:12,423 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 114.78 114.78
- sglang_output_tokens 39.99 39.99
- 2025-07-20 17:21:12,423 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:13,213 - sglang - INFO - [2025-07-20 17:21:13 TP0] Decode batch. #running-req: 1, #token: 4476, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
- 2025-07-20 17:21:13,213 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:14,058 - sglang - INFO - [2025-07-20 17:21:14 TP0] Decode batch. #running-req: 1, #token: 4516, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:21:14,058 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:14,901 - sglang - INFO - [2025-07-20 17:21:14 TP0] Decode batch. #running-req: 1, #token: 4556, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:21:14,901 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:15,746 - sglang - INFO - [2025-07-20 17:21:15 TP0] Decode batch. #running-req: 1, #token: 4596, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:21:15,746 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:16,590 - sglang - INFO - [2025-07-20 17:21:16 TP0] Decode batch. #running-req: 1, #token: 4636, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:21:16,591 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:17,435 - sglang - INFO - [2025-07-20 17:21:17 TP0] Decode batch. #running-req: 1, #token: 4676, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:21:17,435 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:18,282 - sglang - INFO - [2025-07-20 17:21:18 TP0] Decode batch. #running-req: 1, #token: 4716, token usage: 0.12, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:21:18,282 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:19,127 - sglang - INFO - [2025-07-20 17:21:19 TP0] Decode batch. #running-req: 1, #token: 4756, token usage: 0.13, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:21:19,127 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:19,972 - sglang - INFO - [2025-07-20 17:21:19 TP0] Decode batch. #running-req: 1, #token: 4796, token usage: 0.13, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:21:19,972 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:20,818 - sglang - INFO - [2025-07-20 17:21:20 TP0] Decode batch. #running-req: 1, #token: 4836, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:21:20,818 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:21,663 - sglang - INFO - [2025-07-20 17:21:21 TP0] Decode batch. #running-req: 1, #token: 4876, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-07-20 17:21:21,663 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:22,425 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:22,426 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 109.41 109.41
- sglang_output_tokens 38.12 38.12
- 2025-07-20 17:21:22,426 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:22,506 - sglang - INFO - [2025-07-20 17:21:22 TP0] Decode batch. #running-req: 1, #token: 4916, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:21:22,507 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:23,348 - sglang - INFO - [2025-07-20 17:21:23 TP0] Decode batch. #running-req: 1, #token: 4956, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:21:23,348 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:24,189 - sglang - INFO - [2025-07-20 17:21:24 TP0] Decode batch. #running-req: 1, #token: 4996, token usage: 0.13, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:21:24,189 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:25,035 - sglang - INFO - [2025-07-20 17:21:25 TP0] Decode batch. #running-req: 1, #token: 5036, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:21:25,035 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:25,881 - sglang - INFO - [2025-07-20 17:21:25 TP0] Decode batch. #running-req: 1, #token: 5076, token usage: 0.13, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:21:25,882 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:25,995 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:21:26,127 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 17:21:26,370 - sglang - INFO - [2025-07-20 17:21:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:21:26,370 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:21:27,778 - sglang - INFO - [2025-07-20 17:21:27 TP0] Decode batch. #running-req: 1, #token: 2117, token usage: 0.06, gen throughput (token/s): 21.09, #queue-req: 0
- 2025-07-20 17:21:27,778 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:28,617 - sglang - INFO - [2025-07-20 17:21:28 TP0] Decode batch. #running-req: 1, #token: 2157, token usage: 0.06, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:21:28,617 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:29,455 - sglang - INFO - [2025-07-20 17:21:29 TP0] Decode batch. #running-req: 1, #token: 2197, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:21:29,455 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:30,288 - sglang - INFO - [2025-07-20 17:21:30 TP0] Decode batch. #running-req: 1, #token: 2237, token usage: 0.06, gen throughput (token/s): 47.98, #queue-req: 0
- 2025-07-20 17:21:30,289 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:31,123 - sglang - INFO - [2025-07-20 17:21:31 TP0] Decode batch. #running-req: 1, #token: 2277, token usage: 0.06, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:21:31,123 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:31,958 - sglang - INFO - [2025-07-20 17:21:31 TP0] Decode batch. #running-req: 1, #token: 2317, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:21:31,958 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:32,427 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:32,427 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 113.83 113.83
- sglang_output_tokens 49.83 49.83
- 2025-07-20 17:21:32,428 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:32,796 - sglang - INFO - [2025-07-20 17:21:32 TP0] Decode batch. #running-req: 1, #token: 2357, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:21:32,797 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:33,634 - sglang - INFO - [2025-07-20 17:21:33 TP0] Decode batch. #running-req: 1, #token: 2397, token usage: 0.06, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:21:33,635 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:34,471 - sglang - INFO - [2025-07-20 17:21:34 TP0] Decode batch. #running-req: 1, #token: 2437, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:21:34,471 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:35,310 - sglang - INFO - [2025-07-20 17:21:35 TP0] Decode batch. #running-req: 1, #token: 2477, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:21:35,310 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:36,148 - sglang - INFO - [2025-07-20 17:21:36 TP0] Decode batch. #running-req: 1, #token: 2517, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:21:36,148 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:36,984 - sglang - INFO - [2025-07-20 17:21:36 TP0] Decode batch. #running-req: 1, #token: 2557, token usage: 0.07, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:21:36,985 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:37,822 - sglang - INFO - [2025-07-20 17:21:37 TP0] Decode batch. #running-req: 1, #token: 2597, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:21:37,822 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:38,660 - sglang - INFO - [2025-07-20 17:21:38 TP0] Decode batch. #running-req: 1, #token: 2637, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:21:38,660 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:39,498 - sglang - INFO - [2025-07-20 17:21:39 TP0] Decode batch. #running-req: 1, #token: 2677, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:21:39,498 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:40,337 - sglang - INFO - [2025-07-20 17:21:40 TP0] Decode batch. #running-req: 1, #token: 2717, token usage: 0.07, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:21:40,338 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:41,178 - sglang - INFO - [2025-07-20 17:21:41 TP0] Decode batch. #running-req: 1, #token: 2757, token usage: 0.07, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:21:41,178 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:42,017 - sglang - INFO - [2025-07-20 17:21:42 TP0] Decode batch. #running-req: 1, #token: 2797, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:21:42,017 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:42,430 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:42,430 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 108.95 108.95
- sglang_output_tokens 47.70 47.70
- 2025-07-20 17:21:42,430 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:42,857 - sglang - INFO - [2025-07-20 17:21:42 TP0] Decode batch. #running-req: 1, #token: 2837, token usage: 0.07, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:21:42,857 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:43,695 - sglang - INFO - [2025-07-20 17:21:43 TP0] Decode batch. #running-req: 1, #token: 2877, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:21:43,695 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:44,534 - sglang - INFO - [2025-07-20 17:21:44 TP0] Decode batch. #running-req: 1, #token: 2917, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:21:44,534 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:45,371 - sglang - INFO - [2025-07-20 17:21:45 TP0] Decode batch. #running-req: 1, #token: 2957, token usage: 0.08, gen throughput (token/s): 47.80, #queue-req: 0
- 2025-07-20 17:21:45,371 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:46,203 - sglang - INFO - [2025-07-20 17:21:46 TP0] Decode batch. #running-req: 1, #token: 2997, token usage: 0.08, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:21:46,203 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:47,035 - sglang - INFO - [2025-07-20 17:21:47 TP0] Decode batch. #running-req: 1, #token: 3037, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:21:47,035 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:47,873 - sglang - INFO - [2025-07-20 17:21:47 TP0] Decode batch. #running-req: 1, #token: 3077, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:21:47,873 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:48,712 - sglang - INFO - [2025-07-20 17:21:48 TP0] Decode batch. #running-req: 1, #token: 3117, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:21:48,713 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:49,551 - sglang - INFO - [2025-07-20 17:21:49 TP0] Decode batch. #running-req: 1, #token: 3157, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:21:49,551 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:50,391 - sglang - INFO - [2025-07-20 17:21:50 TP0] Decode batch. #running-req: 1, #token: 3197, token usage: 0.08, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:21:50,391 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:51,232 - sglang - INFO - [2025-07-20 17:21:51 TP0] Decode batch. #running-req: 1, #token: 3237, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:21:51,232 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:52,072 - sglang - INFO - [2025-07-20 17:21:52 TP0] Decode batch. #running-req: 1, #token: 3277, token usage: 0.09, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:21:52,072 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:52,431 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:21:52,432 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 104.48 104.48
- sglang_output_tokens 45.74 45.74
- 2025-07-20 17:21:52,432 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:21:52,908 - sglang - INFO - [2025-07-20 17:21:52 TP0] Decode batch. #running-req: 1, #token: 3317, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:21:52,908 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:53,743 - sglang - INFO - [2025-07-20 17:21:53 TP0] Decode batch. #running-req: 1, #token: 3357, token usage: 0.09, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:21:53,743 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:54,583 - sglang - INFO - [2025-07-20 17:21:54 TP0] Decode batch. #running-req: 1, #token: 3397, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:21:54,583 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:55,426 - sglang - INFO - [2025-07-20 17:21:55 TP0] Decode batch. #running-req: 1, #token: 3437, token usage: 0.09, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:21:55,426 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:56,268 - sglang - INFO - [2025-07-20 17:21:56 TP0] Decode batch. #running-req: 1, #token: 3477, token usage: 0.09, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:21:56,268 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:57,109 - sglang - INFO - [2025-07-20 17:21:57 TP0] Decode batch. #running-req: 1, #token: 3517, token usage: 0.09, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:21:57,109 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:57,952 - sglang - INFO - [2025-07-20 17:21:57 TP0] Decode batch. #running-req: 1, #token: 3557, token usage: 0.09, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:21:57,952 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:58,796 - sglang - INFO - [2025-07-20 17:21:58 TP0] Decode batch. #running-req: 1, #token: 3597, token usage: 0.09, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:21:58,796 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:21:59,637 - sglang - INFO - [2025-07-20 17:21:59 TP0] Decode batch. #running-req: 1, #token: 3637, token usage: 0.10, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:21:59,638 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:00,474 - sglang - INFO - [2025-07-20 17:22:00 TP0] Decode batch. #running-req: 1, #token: 3677, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:22:00,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:01,315 - sglang - INFO - [2025-07-20 17:22:01 TP0] Decode batch. #running-req: 1, #token: 3717, token usage: 0.10, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:22:01,315 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:02,158 - sglang - INFO - [2025-07-20 17:22:02 TP0] Decode batch. #running-req: 1, #token: 3757, token usage: 0.10, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:22:02,158 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:02,433 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:22:02,433 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 100.36 100.36
- sglang_output_tokens 43.93 43.93
- 2025-07-20 17:22:02,433 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:22:03,002 - sglang - INFO - [2025-07-20 17:22:03 TP0] Decode batch. #running-req: 1, #token: 3797, token usage: 0.10, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:22:03,003 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:03,844 - sglang - INFO - [2025-07-20 17:22:03 TP0] Decode batch. #running-req: 1, #token: 3837, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:22:03,844 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:04,686 - sglang - INFO - [2025-07-20 17:22:04 TP0] Decode batch. #running-req: 1, #token: 3877, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:22:04,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:05,530 - sglang - INFO - [2025-07-20 17:22:05 TP0] Decode batch. #running-req: 1, #token: 3917, token usage: 0.10, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:22:05,530 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:06,372 - sglang - INFO - [2025-07-20 17:22:06 TP0] Decode batch. #running-req: 1, #token: 3957, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:22:06,372 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:07,211 - sglang - INFO - [2025-07-20 17:22:07 TP0] Decode batch. #running-req: 1, #token: 3997, token usage: 0.11, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:22:07,212 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:08,048 - sglang - INFO - [2025-07-20 17:22:08 TP0] Decode batch. #running-req: 1, #token: 4037, token usage: 0.11, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:22:08,049 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:08,888 - sglang - INFO - [2025-07-20 17:22:08 TP0] Decode batch. #running-req: 1, #token: 4077, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:22:08,888 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:09,731 - sglang - INFO - [2025-07-20 17:22:09 TP0] Decode batch. #running-req: 1, #token: 4117, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:22:09,731 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:10,575 - sglang - INFO - [2025-07-20 17:22:10 TP0] Decode batch. #running-req: 1, #token: 4157, token usage: 0.11, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:22:10,575 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:11,416 - sglang - INFO - [2025-07-20 17:22:11 TP0] Decode batch. #running-req: 1, #token: 4197, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:22:11,417 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:12,261 - sglang - INFO - [2025-07-20 17:22:12 TP0] Decode batch. #running-req: 1, #token: 4237, token usage: 0.11, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:22:12,261 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:12,434 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:22:12,434 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 96.55 96.55
- sglang_output_tokens 42.27 42.27
- 2025-07-20 17:22:12,434 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:22:13,106 - sglang - INFO - [2025-07-20 17:22:13 TP0] Decode batch. #running-req: 1, #token: 4277, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:22:13,106 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:13,949 - sglang - INFO - [2025-07-20 17:22:13 TP0] Decode batch. #running-req: 1, #token: 4317, token usage: 0.11, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:22:13,949 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:14,789 - sglang - INFO - [2025-07-20 17:22:14 TP0] Decode batch. #running-req: 1, #token: 4357, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:22:14,789 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:15,631 - sglang - INFO - [2025-07-20 17:22:15 TP0] Decode batch. #running-req: 1, #token: 4397, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:22:15,631 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:16,473 - sglang - INFO - [2025-07-20 17:22:16 TP0] Decode batch. #running-req: 1, #token: 4437, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:22:16,473 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:17,318 - sglang - INFO - [2025-07-20 17:22:17 TP0] Decode batch. #running-req: 1, #token: 4477, token usage: 0.12, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-07-20 17:22:17,318 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:18,163 - sglang - INFO - [2025-07-20 17:22:18 TP0] Decode batch. #running-req: 1, #token: 4517, token usage: 0.12, gen throughput (token/s): 47.33, #queue-req: 0
- 2025-07-20 17:22:18,163 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:19,005 - sglang - INFO - [2025-07-20 17:22:19 TP0] Decode batch. #running-req: 1, #token: 4557, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:22:19,005 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:19,851 - sglang - INFO - [2025-07-20 17:22:19 TP0] Decode batch. #running-req: 1, #token: 4597, token usage: 0.12, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:22:19,851 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:20,697 - sglang - INFO - [2025-07-20 17:22:20 TP0] Decode batch. #running-req: 1, #token: 4637, token usage: 0.12, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:22:20,697 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:21,542 - sglang - INFO - [2025-07-20 17:22:21 TP0] Decode batch. #running-req: 1, #token: 4677, token usage: 0.12, gen throughput (token/s): 47.33, #queue-req: 0
- 2025-07-20 17:22:21,542 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:22,386 - sglang - INFO - [2025-07-20 17:22:22 TP0] Decode batch. #running-req: 1, #token: 4717, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:22:22,386 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:22,435 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:22:22,436 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 93.02 93.02
- sglang_output_tokens 40.72 40.72
- 2025-07-20 17:22:22,436 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:22:23,227 - sglang - INFO - [2025-07-20 17:22:23 TP0] Decode batch. #running-req: 1, #token: 4757, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:22:23,228 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:24,070 - sglang - INFO - [2025-07-20 17:22:24 TP0] Decode batch. #running-req: 1, #token: 4797, token usage: 0.13, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:22:24,070 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:24,916 - sglang - INFO - [2025-07-20 17:22:24 TP0] Decode batch. #running-req: 1, #token: 4837, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:22:24,916 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:25,762 - sglang - INFO - [2025-07-20 17:22:25 TP0] Decode batch. #running-req: 1, #token: 4877, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:22:25,762 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:26,607 - sglang - INFO - [2025-07-20 17:22:26 TP0] Decode batch. #running-req: 1, #token: 4917, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:22:26,608 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:27,454 - sglang - INFO - [2025-07-20 17:22:27 TP0] Decode batch. #running-req: 1, #token: 4957, token usage: 0.13, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:22:27,455 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:28,300 - sglang - INFO - [2025-07-20 17:22:28 TP0] Decode batch. #running-req: 1, #token: 4997, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:22:28,300 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:29,143 - sglang - INFO - [2025-07-20 17:22:29 TP0] Decode batch. #running-req: 1, #token: 5037, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:22:29,143 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:29,988 - sglang - INFO - [2025-07-20 17:22:29 TP0] Decode batch. #running-req: 1, #token: 5077, token usage: 0.13, gen throughput (token/s): 47.36, #queue-req: 0
- 2025-07-20 17:22:29,988 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:30,078 - __main__ - WARNING - JSON decode error on attempt 2 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:22:30,207 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 17:22:30,437 - sglang - INFO - [2025-07-20 17:22:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:22:30,437 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:22:31,867 - sglang - INFO - [2025-07-20 17:22:31 TP0] Decode batch. #running-req: 1, #token: 2118, token usage: 0.06, gen throughput (token/s): 21.29, #queue-req: 0
- 2025-07-20 17:22:31,867 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:32,438 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:22:32,438 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 97.08 97.08
- sglang_output_tokens 49.86 49.86
- 2025-07-20 17:22:32,438 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:22:32,709 - sglang - INFO - [2025-07-20 17:22:32 TP0] Decode batch. #running-req: 1, #token: 2158, token usage: 0.06, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:22:32,709 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:33,548 - sglang - INFO - [2025-07-20 17:22:33 TP0] Decode batch. #running-req: 1, #token: 2198, token usage: 0.06, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:22:33,548 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:34,386 - sglang - INFO - [2025-07-20 17:22:34 TP0] Decode batch. #running-req: 1, #token: 2238, token usage: 0.06, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:22:34,387 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:35,228 - sglang - INFO - [2025-07-20 17:22:35 TP0] Decode batch. #running-req: 1, #token: 2278, token usage: 0.06, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:22:35,228 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:36,070 - sglang - INFO - [2025-07-20 17:22:36 TP0] Decode batch. #running-req: 1, #token: 2318, token usage: 0.06, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:22:36,070 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:36,908 - sglang - INFO - [2025-07-20 17:22:36 TP0] Decode batch. #running-req: 1, #token: 2358, token usage: 0.06, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:22:36,908 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:37,747 - sglang - INFO - [2025-07-20 17:22:37 TP0] Decode batch. #running-req: 1, #token: 2398, token usage: 0.06, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:22:37,747 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:38,587 - sglang - INFO - [2025-07-20 17:22:38 TP0] Decode batch. #running-req: 1, #token: 2438, token usage: 0.06, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:22:38,587 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:39,429 - sglang - INFO - [2025-07-20 17:22:39 TP0] Decode batch. #running-req: 1, #token: 2478, token usage: 0.07, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:22:39,429 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:40,271 - sglang - INFO - [2025-07-20 17:22:40 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:22:40,271 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:22:40,277 - __main__ - INFO - Finished TaskGroup for worker on 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:22:40,277 - __main__ - INFO - Got 1 docs for 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:22:40,279 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-07-20 17:22:40,279 - __main__ - INFO - Work done
- 2025-07-20 17:22:40,280 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-07-20 17:24:46,300 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-07-20 17:24:46,301 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
- 2025-07-20 17:24:46,301 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
- 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
- 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
- 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
- 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
- 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
- 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
- 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
- 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
- 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
- 2025-07-20 17:24:46,305 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
- 2025-07-20 17:24:46,305 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
- 2025-07-20 17:24:46,306 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
- 2025-07-20 17:24:46,306 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
- 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
- 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
- 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
- 2025-07-20 17:24:46,308 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
- 2025-07-20 17:24:46,308 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
- 2025-07-20 17:24:46,309 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
- 2025-07-20 17:24:46,309 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
- 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
- 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
- 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
- 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
- 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
- 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
- 2025-07-20 17:24:46,312 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
- 2025-07-20 17:24:46,312 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
- 2025-07-20 17:24:46,312 - __main__ - INFO - Found 30 total pdf paths to add
- 2025-07-20 17:24:46,403 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 7.60
- 2025-07-20 17:24:46,634 - __main__ - INFO - Starting pipeline with PID 632984
- 2025-07-20 17:24:46,634 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-07-20 17:24:46,719 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-07-20 17:24:47,751 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-07-20 17:24:48,799 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-07-20 17:24:49,865 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-07-20 17:24:50,936 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-07-20 17:24:52,007 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-07-20 17:24:53,056 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-07-20 17:24:53,210 - sglang - INFO - [2025-07-20 17:24:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192306107, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:24:53,210 - __main__ - INFO - [2025-07-20 17:24:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192306107, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-07-20 17:24:54,131 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-07-20 17:24:54,281 - sglang - INFO - [2025-07-20 17:24:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:24:54,281 - __main__ - INFO - [2025-07-20 17:24:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-07-20 17:24:55,207 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-07-20 17:24:56,275 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-07-20 17:24:57,343 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-07-20 17:24:58,411 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-07-20 17:24:59,479 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-07-20 17:25:00,545 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-07-20 17:25:00,944 - sglang - INFO - [2025-07-20 17:25:00 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:25:00,944 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-07-20 17:25:00,947 - sglang - INFO - [2025-07-20 17:25:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-07-20 17:25:00,948 - sglang - INFO - [2025-07-20 17:25:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-07-20 17:25:00,948 - sglang - INFO - [2025-07-20 17:25:00 TP0] Init torch distributed begin.
- 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Init torch distributed begin.
- 2025-07-20 17:25:01,603 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-07-20 17:25:02,648 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-07-20 17:25:03,690 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-07-20 17:25:04,733 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-07-20 17:25:05,766 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-07-20 17:25:06,360 - sglang - INFO - [2025-07-20 17:25:06 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:25:06,360 - __main__ - INFO - [2025-07-20 17:25:06 TP0] Load weight begin. avail mem=23.33 GB
- 2025-07-20 17:25:06,809 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-07-20 17:25:07,069 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:25:07,070 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-07-20 17:25:07,851 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-07-20 17:25:08,892 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-07-20 17:25:09,941 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-07-20 17:25:11,003 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-07-20 17:25:12,074 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-07-20 17:25:13,147 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-07-20 17:25:14,214 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-07-20 17:25:14,762 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:07<00:23, 7.69s/it]
- 2025-07-20 17:25:14,762 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:07<00:23, 7.69s/it]
- 2025-07-20 17:25:15,290 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
- 2025-07-20 17:25:16,358 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
- 2025-07-20 17:25:17,425 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
- 2025-07-20 17:25:18,497 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
- 2025-07-20 17:25:19,569 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
- 2025-07-20 17:25:20,637 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
- 2025-07-20 17:25:21,709 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
- 2025-07-20 17:25:21,717 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:14<00:14, 7.26s/it]
- 2025-07-20 17:25:21,717 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:14<00:14, 7.26s/it]
- 2025-07-20 17:25:22,781 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
- 2025-07-20 17:25:23,841 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
- 2025-07-20 17:25:24,896 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
- 2025-07-20 17:25:25,963 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
- 2025-07-20 17:25:27,030 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
- 2025-07-20 17:25:28,096 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
- 2025-07-20 17:25:29,164 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
- 2025-07-20 17:25:29,488 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:22<00:07, 7.49s/it]
- 2025-07-20 17:25:29,488 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:22<00:07, 7.49s/it]
- 2025-07-20 17:25:30,240 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
- 2025-07-20 17:25:31,308 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
- 2025-07-20 17:25:32,376 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
- 2025-07-20 17:25:32,412 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 5.69s/it]
- 2025-07-20 17:25:32,412 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 5.69s/it]
- 2025-07-20 17:25:32,412 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 6.34s/it]
- 2025-07-20 17:25:32,412 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 6.34s/it]
- 2025-07-20 17:25:32,412 - sglang - INFO -
- 2025-07-20 17:25:32,412 - __main__ - INFO -
- 2025-07-20 17:25:32,472 - sglang - INFO - [2025-07-20 17:25:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:25:32,472 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-07-20 17:25:32,480 - sglang - INFO - [2025-07-20 17:25:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:25:32,480 - __main__ - INFO - [2025-07-20 17:25:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-07-20 17:25:32,480 - sglang - INFO - [2025-07-20 17:25:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:25:32,480 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Memory pool end. avail mem=5.30 GB
- 2025-07-20 17:25:32,659 - sglang - INFO - [2025-07-20 17:25:32 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:25:32,659 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-07-20 17:25:33,473 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
- 2025-07-20 17:25:34,549 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
- 2025-07-20 17:25:34,883 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.33s/it]
50%|█████ | 2/4 [00:01<00:01, 1.39it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.90it/s]
100%|██████████| 4/4 [00:02<00:00, 2.28it/s]
100%|██████████| 4/4 [00:02<00:00, 1.80it/s]
- 2025-07-20 17:25:34,883 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.33s/it]
50%|█████ | 2/4 [00:01<00:01, 1.39it/s]
75%|███████▌ | 3/4 [00:01<00:00, 1.90it/s]
100%|██████████| 4/4 [00:02<00:00, 2.28it/s]
100%|██████████| 4/4 [00:02<00:00, 1.80it/s]
- 2025-07-20 17:25:34,883 - sglang - INFO - [2025-07-20 17:25:34 TP0] Capture cuda graph end. Time elapsed: 2.22 s
- 2025-07-20 17:25:34,883 - __main__ - INFO - [2025-07-20 17:25:34 TP0] Capture cuda graph end. Time elapsed: 2.22 s
- 2025-07-20 17:25:35,635 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
- 2025-07-20 17:25:35,756 - sglang - INFO - [2025-07-20 17:25:35 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:25:35,757 - __main__ - INFO - [2025-07-20 17:25:35 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-07-20 17:25:36,722 - __main__ - INFO - sglang server is ready.
- 2025-07-20 17:25:36,723 - __main__ - INFO - Queue remaining: 30
- 2025-07-20 17:25:36,723 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:25:36,723 - __main__ - INFO -
- Worker ID
- ---------
- 2025-07-20 17:25:36,723 - __main__ - INFO - Worker 0 processing work item b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
- 2025-07-20 17:25:36,723 - __main__ - INFO - Created all tasks for b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
- 2025-07-20 17:25:36,727 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
- 2025-07-20 17:25:36,861 - sglang - INFO - [2025-07-20 17:25:36 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:25:36,861 - __main__ - INFO - [2025-07-20 17:25:36 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:25:36,862 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:25:37,636 - sglang - INFO - [2025-07-20 17:25:37] The server is fired up and ready to roll!
- 2025-07-20 17:25:37,636 - __main__ - INFO - [2025-07-20 17:25:37] The server is fired up and ready to roll!
- 2025-07-20 17:25:43,354 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
- 2025-07-20 17:25:43,380 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
- 2025-07-20 17:25:43,397 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
- 2025-07-20 17:25:43,407 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
- 2025-07-20 17:25:43,429 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 17:25:43,431 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
- 2025-07-20 17:25:43,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
- 2025-07-20 17:25:43,505 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
- 2025-07-20 17:25:43,509 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
- 2025-07-20 17:25:43,515 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
- 2025-07-20 17:25:43,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
- 2025-07-20 17:25:46,733 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:25:46,733 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:25:46,734 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:25:56,736 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:25:56,737 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:25:56,737 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:26:06,334 - sglang - INFO - [2025-07-20 17:26:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2517, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:26:06,337 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:26:06,739 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:06,739 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:26:06,739 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:26:08,146 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-07-20 17:26:11,260 - sglang - INFO - [2025-07-20 17:26:11 TP0] Prefill batch. #new-seq: 6, #new-token: 12249, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 4
- 2025-07-20 17:26:11,260 - __main__ - INFO - sglang running req: 1 queue req: 4
- 2025-07-20 17:26:15,912 - sglang - INFO - [2025-07-20 17:26:15 TP0] Decode batch. #running-req: 7, #token: 14997, token usage: 0.39, gen throughput (token/s): 5.93, #queue-req: 4
- 2025-07-20 17:26:15,912 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:16,741 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:16,741 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-07-20 17:26:16,741 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:26:16,793 - sglang - INFO - [2025-07-20 17:26:16 TP0] Decode batch. #running-req: 7, #token: 15277, token usage: 0.40, gen throughput (token/s): 317.79, #queue-req: 4
- 2025-07-20 17:26:16,794 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:17,676 - sglang - INFO - [2025-07-20 17:26:17 TP0] Decode batch. #running-req: 7, #token: 15557, token usage: 0.41, gen throughput (token/s): 317.40, #queue-req: 4
- 2025-07-20 17:26:17,676 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:18,561 - sglang - INFO - [2025-07-20 17:26:18 TP0] Decode batch. #running-req: 7, #token: 15837, token usage: 0.42, gen throughput (token/s): 316.32, #queue-req: 4
- 2025-07-20 17:26:18,561 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:19,447 - sglang - INFO - [2025-07-20 17:26:19 TP0] Decode batch. #running-req: 7, #token: 16117, token usage: 0.42, gen throughput (token/s): 315.82, #queue-req: 4
- 2025-07-20 17:26:19,448 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:20,334 - sglang - INFO - [2025-07-20 17:26:20 TP0] Decode batch. #running-req: 7, #token: 16397, token usage: 0.43, gen throughput (token/s): 315.80, #queue-req: 4
- 2025-07-20 17:26:20,334 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:21,222 - sglang - INFO - [2025-07-20 17:26:21 TP0] Decode batch. #running-req: 7, #token: 16677, token usage: 0.44, gen throughput (token/s): 315.33, #queue-req: 4
- 2025-07-20 17:26:21,222 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:22,110 - sglang - INFO - [2025-07-20 17:26:22 TP0] Decode batch. #running-req: 7, #token: 16957, token usage: 0.45, gen throughput (token/s): 315.36, #queue-req: 4
- 2025-07-20 17:26:22,110 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:26:22,288 - sglang - INFO - [2025-07-20 17:26:22 TP0] Prefill batch. #new-seq: 3, #new-token: 6997, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 1
- 2025-07-20 17:26:22,288 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-07-20 17:26:25,101 - sglang - INFO - [2025-07-20 17:26:25 TP0] Decode batch. #running-req: 9, #token: 21921, token usage: 0.58, gen throughput (token/s): 114.68, #queue-req: 1
- 2025-07-20 17:26:25,101 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:26:26,050 - sglang - INFO - [2025-07-20 17:26:26 TP0] Decode batch. #running-req: 9, #token: 22281, token usage: 0.59, gen throughput (token/s): 379.10, #queue-req: 1
- 2025-07-20 17:26:26,051 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:26:26,144 - sglang - INFO - [2025-07-20 17:26:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2128, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 8, #queue-req: 0
- 2025-07-20 17:26:26,144 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:26:26,743 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:26,744 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 38.44 38.44
- sglang_output_tokens 7.15 7.15
- 2025-07-20 17:26:26,744 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 11
- 2025-07-20 17:26:27,652 - sglang - INFO - [2025-07-20 17:26:27 TP0] Decode batch. #running-req: 9, #token: 22567, token usage: 0.59, gen throughput (token/s): 224.15, #queue-req: 0
- 2025-07-20 17:26:27,652 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:26:28,558 - sglang - INFO - [2025-07-20 17:26:28 TP0] Decode batch. #running-req: 7, #token: 17913, token usage: 0.47, gen throughput (token/s): 348.65, #queue-req: 0
- 2025-07-20 17:26:28,559 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:26:29,448 - sglang - INFO - [2025-07-20 17:26:29 TP0] Decode batch. #running-req: 6, #token: 15533, token usage: 0.41, gen throughput (token/s): 312.49, #queue-req: 0
- 2025-07-20 17:26:29,448 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:26:30,330 - sglang - INFO - [2025-07-20 17:26:30 TP0] Decode batch. #running-req: 6, #token: 15773, token usage: 0.42, gen throughput (token/s): 272.00, #queue-req: 0
- 2025-07-20 17:26:30,330 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:26:31,208 - sglang - INFO - [2025-07-20 17:26:31 TP0] Decode batch. #running-req: 5, #token: 12903, token usage: 0.34, gen throughput (token/s): 259.85, #queue-req: 0
- 2025-07-20 17:26:31,208 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:26:32,075 - sglang - INFO - [2025-07-20 17:26:32 TP0] Decode batch. #running-req: 5, #token: 13103, token usage: 0.34, gen throughput (token/s): 230.51, #queue-req: 0
- 2025-07-20 17:26:32,076 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:26:32,948 - sglang - INFO - [2025-07-20 17:26:32 TP0] Decode batch. #running-req: 5, #token: 13303, token usage: 0.35, gen throughput (token/s): 229.17, #queue-req: 0
- 2025-07-20 17:26:32,948 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:26:33,814 - sglang - INFO - [2025-07-20 17:26:33 TP0] Decode batch. #running-req: 4, #token: 11136, token usage: 0.29, gen throughput (token/s): 187.00, #queue-req: 0
- 2025-07-20 17:26:33,815 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:26:34,672 - sglang - INFO - [2025-07-20 17:26:34 TP0] Decode batch. #running-req: 3, #token: 8812, token usage: 0.23, gen throughput (token/s): 141.10, #queue-req: 0
- 2025-07-20 17:26:34,672 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:35,527 - sglang - INFO - [2025-07-20 17:26:35 TP0] Decode batch. #running-req: 3, #token: 8932, token usage: 0.24, gen throughput (token/s): 140.37, #queue-req: 0
- 2025-07-20 17:26:35,527 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:36,384 - sglang - INFO - [2025-07-20 17:26:36 TP0] Decode batch. #running-req: 3, #token: 9052, token usage: 0.24, gen throughput (token/s): 140.08, #queue-req: 0
- 2025-07-20 17:26:36,384 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:36,746 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:36,746 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 151.15 151.15
- sglang_output_tokens 30.68 30.68
- 2025-07-20 17:26:36,746 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 11
- 2025-07-20 17:26:37,242 - sglang - INFO - [2025-07-20 17:26:37 TP0] Decode batch. #running-req: 3, #token: 9172, token usage: 0.24, gen throughput (token/s): 139.74, #queue-req: 0
- 2025-07-20 17:26:37,243 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:38,100 - sglang - INFO - [2025-07-20 17:26:38 TP0] Decode batch. #running-req: 3, #token: 9292, token usage: 0.24, gen throughput (token/s): 139.89, #queue-req: 0
- 2025-07-20 17:26:38,100 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:38,959 - sglang - INFO - [2025-07-20 17:26:38 TP0] Decode batch. #running-req: 3, #token: 9412, token usage: 0.25, gen throughput (token/s): 139.79, #queue-req: 0
- 2025-07-20 17:26:38,959 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:39,817 - sglang - INFO - [2025-07-20 17:26:39 TP0] Decode batch. #running-req: 3, #token: 9532, token usage: 0.25, gen throughput (token/s): 139.84, #queue-req: 0
- 2025-07-20 17:26:39,817 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:40,675 - sglang - INFO - [2025-07-20 17:26:40 TP0] Decode batch. #running-req: 3, #token: 9652, token usage: 0.25, gen throughput (token/s): 139.81, #queue-req: 0
- 2025-07-20 17:26:40,675 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:41,536 - sglang - INFO - [2025-07-20 17:26:41 TP0] Decode batch. #running-req: 3, #token: 9772, token usage: 0.26, gen throughput (token/s): 139.36, #queue-req: 0
- 2025-07-20 17:26:41,536 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:42,398 - sglang - INFO - [2025-07-20 17:26:42 TP0] Decode batch. #running-req: 3, #token: 9892, token usage: 0.26, gen throughput (token/s): 139.20, #queue-req: 0
- 2025-07-20 17:26:42,398 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:43,258 - sglang - INFO - [2025-07-20 17:26:43 TP0] Decode batch. #running-req: 3, #token: 10012, token usage: 0.26, gen throughput (token/s): 139.56, #queue-req: 0
- 2025-07-20 17:26:43,258 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:44,121 - sglang - INFO - [2025-07-20 17:26:44 TP0] Decode batch. #running-req: 3, #token: 10132, token usage: 0.27, gen throughput (token/s): 138.98, #queue-req: 0
- 2025-07-20 17:26:44,122 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:26:44,979 - sglang - INFO - [2025-07-20 17:26:44 TP0] Decode batch. #running-req: 1, #token: 3406, token usage: 0.09, gen throughput (token/s): 104.98, #queue-req: 0
- 2025-07-20 17:26:44,979 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:45,816 - sglang - INFO - [2025-07-20 17:26:45 TP0] Decode batch. #running-req: 1, #token: 3446, token usage: 0.09, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:26:45,817 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:46,647 - sglang - INFO - [2025-07-20 17:26:46 TP0] Decode batch. #running-req: 1, #token: 3486, token usage: 0.09, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:26:46,648 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:46,747 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:46,747 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 180.29 180.29
- sglang_output_tokens 43.04 43.04
- 2025-07-20 17:26:46,748 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:26:47,475 - sglang - INFO - [2025-07-20 17:26:47 TP0] Decode batch. #running-req: 1, #token: 3526, token usage: 0.09, gen throughput (token/s): 48.33, #queue-req: 0
- 2025-07-20 17:26:47,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:48,302 - sglang - INFO - [2025-07-20 17:26:48 TP0] Decode batch. #running-req: 1, #token: 3566, token usage: 0.09, gen throughput (token/s): 48.35, #queue-req: 0
- 2025-07-20 17:26:48,303 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:49,139 - sglang - INFO - [2025-07-20 17:26:49 TP0] Decode batch. #running-req: 1, #token: 3606, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:26:49,139 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:49,976 - sglang - INFO - [2025-07-20 17:26:49 TP0] Decode batch. #running-req: 1, #token: 3646, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:26:49,976 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:50,813 - sglang - INFO - [2025-07-20 17:26:50 TP0] Decode batch. #running-req: 1, #token: 3686, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:26:50,814 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:51,652 - sglang - INFO - [2025-07-20 17:26:51 TP0] Decode batch. #running-req: 1, #token: 3726, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:26:51,653 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:52,490 - sglang - INFO - [2025-07-20 17:26:52 TP0] Decode batch. #running-req: 1, #token: 3766, token usage: 0.10, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:26:52,490 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:53,327 - sglang - INFO - [2025-07-20 17:26:53 TP0] Decode batch. #running-req: 1, #token: 3806, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:26:53,327 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:54,163 - sglang - INFO - [2025-07-20 17:26:54 TP0] Decode batch. #running-req: 1, #token: 3846, token usage: 0.10, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:26:54,164 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:54,997 - sglang - INFO - [2025-07-20 17:26:54 TP0] Decode batch. #running-req: 1, #token: 3886, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:26:54,997 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:55,830 - sglang - INFO - [2025-07-20 17:26:55 TP0] Decode batch. #running-req: 1, #token: 3926, token usage: 0.10, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:26:55,830 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:56,670 - sglang - INFO - [2025-07-20 17:26:56 TP0] Decode batch. #running-req: 1, #token: 3966, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:26:56,671 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:56,749 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:26:56,749 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 166.47 166.47
- sglang_output_tokens 39.74 39.74
- 2025-07-20 17:26:56,750 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:26:57,508 - sglang - INFO - [2025-07-20 17:26:57 TP0] Decode batch. #running-req: 1, #token: 4006, token usage: 0.11, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:26:57,509 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:58,347 - sglang - INFO - [2025-07-20 17:26:58 TP0] Decode batch. #running-req: 1, #token: 4046, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:26:58,347 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:26:59,187 - sglang - INFO - [2025-07-20 17:26:59 TP0] Decode batch. #running-req: 1, #token: 4086, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:26:59,187 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:00,027 - sglang - INFO - [2025-07-20 17:27:00 TP0] Decode batch. #running-req: 1, #token: 4126, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:27:00,027 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:00,866 - sglang - INFO - [2025-07-20 17:27:00 TP0] Decode batch. #running-req: 1, #token: 4166, token usage: 0.11, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:27:00,866 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:01,704 - sglang - INFO - [2025-07-20 17:27:01 TP0] Decode batch. #running-req: 1, #token: 4206, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:27:01,704 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:02,539 - sglang - INFO - [2025-07-20 17:27:02 TP0] Decode batch. #running-req: 1, #token: 4246, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:27:02,540 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:03,375 - sglang - INFO - [2025-07-20 17:27:03 TP0] Decode batch. #running-req: 1, #token: 4286, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:27:03,376 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:04,218 - sglang - INFO - [2025-07-20 17:27:04 TP0] Decode batch. #running-req: 1, #token: 4326, token usage: 0.11, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:27:04,218 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:05,059 - sglang - INFO - [2025-07-20 17:27:05 TP0] Decode batch. #running-req: 1, #token: 4366, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:27:05,059 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:05,900 - sglang - INFO - [2025-07-20 17:27:05 TP0] Decode batch. #running-req: 1, #token: 4406, token usage: 0.12, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:27:05,900 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:06,742 - sglang - INFO - [2025-07-20 17:27:06 TP0] Decode batch. #running-req: 1, #token: 4446, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:27:06,742 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:06,751 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:27:06,751 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 154.62 154.62
- sglang_output_tokens 36.91 36.91
- 2025-07-20 17:27:06,751 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:27:07,584 - sglang - INFO - [2025-07-20 17:27:07 TP0] Decode batch. #running-req: 1, #token: 4486, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:27:07,584 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:08,425 - sglang - INFO - [2025-07-20 17:27:08 TP0] Decode batch. #running-req: 1, #token: 4526, token usage: 0.12, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:27:08,425 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:09,263 - sglang - INFO - [2025-07-20 17:27:09 TP0] Decode batch. #running-req: 1, #token: 4566, token usage: 0.12, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:27:09,264 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:10,101 - sglang - INFO - [2025-07-20 17:27:10 TP0] Decode batch. #running-req: 1, #token: 4606, token usage: 0.12, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:27:10,101 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:10,940 - sglang - INFO - [2025-07-20 17:27:10 TP0] Decode batch. #running-req: 1, #token: 4646, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:27:10,940 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:11,784 - sglang - INFO - [2025-07-20 17:27:11 TP0] Decode batch. #running-req: 1, #token: 4686, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:27:11,784 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:12,629 - sglang - INFO - [2025-07-20 17:27:12 TP0] Decode batch. #running-req: 1, #token: 4726, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:27:12,629 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:13,472 - sglang - INFO - [2025-07-20 17:27:13 TP0] Decode batch. #running-req: 1, #token: 4766, token usage: 0.13, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:27:13,472 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:14,316 - sglang - INFO - [2025-07-20 17:27:14 TP0] Decode batch. #running-req: 1, #token: 4806, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:27:14,316 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:15,159 - sglang - INFO - [2025-07-20 17:27:15 TP0] Decode batch. #running-req: 1, #token: 4846, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:27:15,159 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:16,002 - sglang - INFO - [2025-07-20 17:27:16 TP0] Decode batch. #running-req: 1, #token: 4886, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:27:16,002 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:16,752 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:27:16,753 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 144.34 144.34
- sglang_output_tokens 34.46 34.46
- 2025-07-20 17:27:16,753 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:27:16,843 - sglang - INFO - [2025-07-20 17:27:16 TP0] Decode batch. #running-req: 1, #token: 4926, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:27:16,844 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:17,685 - sglang - INFO - [2025-07-20 17:27:17 TP0] Decode batch. #running-req: 1, #token: 4966, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:27:17,685 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:18,529 - sglang - INFO - [2025-07-20 17:27:18 TP0] Decode batch. #running-req: 1, #token: 5006, token usage: 0.13, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:27:18,529 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:19,375 - sglang - INFO - [2025-07-20 17:27:19 TP0] Decode batch. #running-req: 1, #token: 5046, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:27:19,375 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:20,220 - sglang - INFO - [2025-07-20 17:27:20 TP0] Decode batch. #running-req: 1, #token: 5086, token usage: 0.13, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:27:20,220 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:21,066 - sglang - INFO - [2025-07-20 17:27:21 TP0] Decode batch. #running-req: 1, #token: 5126, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:27:21,066 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:21,912 - sglang - INFO - [2025-07-20 17:27:21 TP0] Decode batch. #running-req: 1, #token: 5166, token usage: 0.14, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:27:21,912 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:22,045 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604302101.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:27:22,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
- 2025-07-20 17:27:22,476 - sglang - INFO - [2025-07-20 17:27:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2173, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:27:22,477 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:27:23,851 - sglang - INFO - [2025-07-20 17:27:23 TP0] Decode batch. #running-req: 1, #token: 2207, token usage: 0.06, gen throughput (token/s): 20.63, #queue-req: 0
- 2025-07-20 17:27:23,851 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:24,686 - sglang - INFO - [2025-07-20 17:27:24 TP0] Decode batch. #running-req: 1, #token: 2247, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:27:24,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:25,517 - sglang - INFO - [2025-07-20 17:27:25 TP0] Decode batch. #running-req: 1, #token: 2287, token usage: 0.06, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:27:25,517 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:26,354 - sglang - INFO - [2025-07-20 17:27:26 TP0] Decode batch. #running-req: 1, #token: 2327, token usage: 0.06, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:27:26,355 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:26,754 - __main__ - INFO - Queue remaining: 29
- 2025-07-20 17:27:26,755 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 148.89 148.89
- sglang_output_tokens 51.01 51.01
- 2025-07-20 17:27:26,755 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:27:27,192 - sglang - INFO - [2025-07-20 17:27:27 TP0] Decode batch. #running-req: 1, #token: 2367, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:27:27,192 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:28,030 - sglang - INFO - [2025-07-20 17:27:28 TP0] Decode batch. #running-req: 1, #token: 2407, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:27:28,030 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:28,867 - sglang - INFO - [2025-07-20 17:27:28 TP0] Decode batch. #running-req: 1, #token: 2447, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:27:28,867 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:29,705 - sglang - INFO - [2025-07-20 17:27:29 TP0] Decode batch. #running-req: 1, #token: 2487, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:27:29,705 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:30,543 - sglang - INFO - [2025-07-20 17:27:30 TP0] Decode batch. #running-req: 1, #token: 2527, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:27:30,543 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:31,380 - sglang - INFO - [2025-07-20 17:27:31 TP0] Decode batch. #running-req: 1, #token: 2567, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:27:31,380 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:32,216 - sglang - INFO - [2025-07-20 17:27:32 TP0] Decode batch. #running-req: 1, #token: 2607, token usage: 0.07, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:27:32,216 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:33,047 - sglang - INFO - [2025-07-20 17:27:33 TP0] Decode batch. #running-req: 1, #token: 2647, token usage: 0.07, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:27:33,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:27:33,872 - __main__ - INFO - Finished TaskGroup for worker on b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
- 2025-07-20 17:27:33,872 - __main__ - INFO - Got 1 docs for b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
- 2025-07-20 17:27:33,874 - __main__ - INFO - Worker 0 processing work item 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
- 2025-07-20 17:27:33,874 - __main__ - INFO - Created all tasks for 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
- 2025-07-20 17:27:33,882 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
- 2025-07-20 17:27:34,015 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
- 2025-07-20 17:27:34,022 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
- 2025-07-20 17:27:34,025 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
- 2025-07-20 17:27:34,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
- 2025-07-20 17:27:34,073 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
- 2025-07-20 17:27:34,090 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
- 2025-07-20 17:27:34,123 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
- 2025-07-20 17:27:34,128 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
- 2025-07-20 17:27:34,150 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
- 2025-07-20 17:27:34,166 - sglang - INFO - [2025-07-20 17:27:34 TP0] Prefill batch. #new-seq: 1, #new-token: 1713, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:27:34,166 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:27:34,918 - sglang - INFO - [2025-07-20 17:27:34 TP0] Prefill batch. #new-seq: 6, #new-token: 14259, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
- 2025-07-20 17:27:34,918 - __main__ - INFO - sglang running req: 1 queue req: 2
- 2025-07-20 17:27:36,756 - __main__ - INFO - Queue remaining: 28
- 2025-07-20 17:27:36,757 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 140.15 140.15
- finished_output_tokens 33.43 33.43
- sglang_input_tokens 152.90 152.90
- sglang_output_tokens 51.03 51.03
- 2025-07-20 17:27:36,757 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 9
- 2025-07-20 17:27:39,155 - sglang - INFO - [2025-07-20 17:27:39 TP0] Decode batch. #running-req: 7, #token: 15979, token usage: 0.42, gen throughput (token/s): 7.53, #queue-req: 2
- 2025-07-20 17:27:39,155 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:40,041 - sglang - INFO - [2025-07-20 17:27:40 TP0] Decode batch. #running-req: 7, #token: 16259, token usage: 0.43, gen throughput (token/s): 315.80, #queue-req: 2
- 2025-07-20 17:27:40,041 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:40,932 - sglang - INFO - [2025-07-20 17:27:40 TP0] Decode batch. #running-req: 7, #token: 16539, token usage: 0.44, gen throughput (token/s): 314.16, #queue-req: 2
- 2025-07-20 17:27:40,933 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:41,829 - sglang - INFO - [2025-07-20 17:27:41 TP0] Decode batch. #running-req: 7, #token: 16819, token usage: 0.44, gen throughput (token/s): 312.18, #queue-req: 2
- 2025-07-20 17:27:41,829 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:42,726 - sglang - INFO - [2025-07-20 17:27:42 TP0] Decode batch. #running-req: 7, #token: 17099, token usage: 0.45, gen throughput (token/s): 312.45, #queue-req: 2
- 2025-07-20 17:27:42,726 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:43,621 - sglang - INFO - [2025-07-20 17:27:43 TP0] Decode batch. #running-req: 7, #token: 17379, token usage: 0.46, gen throughput (token/s): 312.55, #queue-req: 2
- 2025-07-20 17:27:43,622 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:44,517 - sglang - INFO - [2025-07-20 17:27:44 TP0] Decode batch. #running-req: 7, #token: 17659, token usage: 0.46, gen throughput (token/s): 312.52, #queue-req: 2
- 2025-07-20 17:27:44,518 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:27:44,967 - sglang - INFO - [2025-07-20 17:27:44 TP0] Prefill batch. #new-seq: 2, #new-token: 4599, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.41, #running-req: 6, #queue-req: 0
- 2025-07-20 17:27:44,967 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:27:46,758 - __main__ - INFO - Queue remaining: 28
- 2025-07-20 17:27:46,758 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 132.38 132.38
- finished_output_tokens 31.58 31.58
- sglang_input_tokens 164.64 164.64
- sglang_output_tokens 51.12 51.12
- 2025-07-20 17:27:46,758 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 9
- 2025-07-20 17:27:46,813 - sglang - INFO - [2025-07-20 17:27:46 TP0] Decode batch. #running-req: 7, #token: 18368, token usage: 0.48, gen throughput (token/s): 122.87, #queue-req: 0
- 2025-07-20 17:27:46,813 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:27:47,702 - sglang - INFO - [2025-07-20 17:27:47 TP0] Decode batch. #running-req: 7, #token: 18648, token usage: 0.49, gen throughput (token/s): 314.82, #queue-req: 0
- 2025-07-20 17:27:47,702 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:27:48,593 - sglang - INFO - [2025-07-20 17:27:48 TP0] Decode batch. #running-req: 7, #token: 18928, token usage: 0.50, gen throughput (token/s): 314.27, #queue-req: 0
- 2025-07-20 17:27:48,593 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:27:49,487 - sglang - INFO - [2025-07-20 17:27:49 TP0] Decode batch. #running-req: 6, #token: 16554, token usage: 0.44, gen throughput (token/s): 281.73, #queue-req: 0
- 2025-07-20 17:27:49,487 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:27:50,380 - sglang - INFO - [2025-07-20 17:27:50 TP0] Decode batch. #running-req: 6, #token: 16794, token usage: 0.44, gen throughput (token/s): 268.91, #queue-req: 0
- 2025-07-20 17:27:50,380 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:27:51,264 - sglang - INFO - [2025-07-20 17:27:51 TP0] Decode batch. #running-req: 4, #token: 11257, token usage: 0.30, gen throughput (token/s): 213.78, #queue-req: 0
- 2025-07-20 17:27:51,264 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:27:52,134 - sglang - INFO - [2025-07-20 17:27:52 TP0] Decode batch. #running-req: 4, #token: 11417, token usage: 0.30, gen throughput (token/s): 183.81, #queue-req: 0
- 2025-07-20 17:27:52,135 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:27:53,005 - sglang - INFO - [2025-07-20 17:27:53 TP0] Decode batch. #running-req: 4, #token: 11577, token usage: 0.30, gen throughput (token/s): 183.78, #queue-req: 0
- 2025-07-20 17:27:53,005 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:27:53,872 - sglang - INFO - [2025-07-20 17:27:53 TP0] Decode batch. #running-req: 3, #token: 8484, token usage: 0.22, gen throughput (token/s): 171.87, #queue-req: 0
- 2025-07-20 17:27:53,872 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:27:54,729 - sglang - INFO - [2025-07-20 17:27:54 TP0] Decode batch. #running-req: 3, #token: 8604, token usage: 0.23, gen throughput (token/s): 139.94, #queue-req: 0
- 2025-07-20 17:27:54,730 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:27:55,578 - sglang - INFO - [2025-07-20 17:27:55 TP0] Decode batch. #running-req: 2, #token: 5439, token usage: 0.14, gen throughput (token/s): 117.83, #queue-req: 0
- 2025-07-20 17:27:55,578 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:27:56,427 - sglang - INFO - [2025-07-20 17:27:56 TP0] Decode batch. #running-req: 2, #token: 5519, token usage: 0.15, gen throughput (token/s): 94.21, #queue-req: 0
- 2025-07-20 17:27:56,428 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:27:56,759 - __main__ - INFO - Queue remaining: 28
- 2025-07-20 17:27:56,760 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 125.43 125.43
- finished_output_tokens 29.92 29.92
- sglang_input_tokens 220.69 220.69
- sglang_output_tokens 61.77 61.77
- 2025-07-20 17:27:56,760 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 9
- 2025-07-20 17:27:57,277 - sglang - INFO - [2025-07-20 17:27:57 TP0] Decode batch. #running-req: 2, #token: 5599, token usage: 0.15, gen throughput (token/s): 94.17, #queue-req: 0
- 2025-07-20 17:27:57,277 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:27:58,124 - sglang - INFO - [2025-07-20 17:27:58 TP0] Decode batch. #running-req: 2, #token: 5679, token usage: 0.15, gen throughput (token/s): 94.42, #queue-req: 0
- 2025-07-20 17:27:58,124 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:27:58,973 - sglang - INFO - [2025-07-20 17:27:58 TP0] Decode batch. #running-req: 2, #token: 5759, token usage: 0.15, gen throughput (token/s): 94.26, #queue-req: 0
- 2025-07-20 17:27:58,973 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:27:59,823 - sglang - INFO - [2025-07-20 17:27:59 TP0] Decode batch. #running-req: 2, #token: 5839, token usage: 0.15, gen throughput (token/s): 94.13, #queue-req: 0
- 2025-07-20 17:27:59,823 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:00,673 - sglang - INFO - [2025-07-20 17:28:00 TP0] Decode batch. #running-req: 2, #token: 5919, token usage: 0.16, gen throughput (token/s): 94.13, #queue-req: 0
- 2025-07-20 17:28:00,673 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:01,519 - sglang - INFO - [2025-07-20 17:28:01 TP0] Decode batch. #running-req: 2, #token: 5999, token usage: 0.16, gen throughput (token/s): 94.49, #queue-req: 0
- 2025-07-20 17:28:01,520 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:02,363 - sglang - INFO - [2025-07-20 17:28:02 TP0] Decode batch. #running-req: 2, #token: 6079, token usage: 0.16, gen throughput (token/s): 94.87, #queue-req: 0
- 2025-07-20 17:28:02,363 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:03,207 - sglang - INFO - [2025-07-20 17:28:03 TP0] Decode batch. #running-req: 2, #token: 6159, token usage: 0.16, gen throughput (token/s): 94.75, #queue-req: 0
- 2025-07-20 17:28:03,207 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:04,058 - sglang - INFO - [2025-07-20 17:28:04 TP0] Decode batch. #running-req: 2, #token: 6239, token usage: 0.16, gen throughput (token/s): 93.96, #queue-req: 0
- 2025-07-20 17:28:04,059 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:28:04,904 - sglang - INFO - [2025-07-20 17:28:04 TP0] Decode batch. #running-req: 1, #token: 3177, token usage: 0.08, gen throughput (token/s): 72.10, #queue-req: 0
- 2025-07-20 17:28:04,905 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:05,562 - __main__ - INFO - Finished TaskGroup for worker on 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
- 2025-07-20 17:28:05,562 - __main__ - INFO - Got 1 docs for 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
- 2025-07-20 17:28:05,563 - __main__ - INFO - Worker 0 processing work item 10dc5d29c3f17870daf918c9555cd0b939acbffe
- 2025-07-20 17:28:05,563 - __main__ - INFO - Created all tasks for 10dc5d29c3f17870daf918c9555cd0b939acbffe
- 2025-07-20 17:28:05,570 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
- 2025-07-20 17:28:05,672 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
- 2025-07-20 17:28:05,706 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
- 2025-07-20 17:28:05,713 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
- 2025-07-20 17:28:05,718 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
- 2025-07-20 17:28:05,728 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
- 2025-07-20 17:28:05,736 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
- 2025-07-20 17:28:05,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
- 2025-07-20 17:28:05,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
- 2025-07-20 17:28:05,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
- 2025-07-20 17:28:05,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
- 2025-07-20 17:28:05,816 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
- 2025-07-20 17:28:05,835 - sglang - INFO - [2025-07-20 17:28:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1485, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:28:05,835 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:28:05,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
- 2025-07-20 17:28:06,761 - __main__ - INFO - Queue remaining: 27
- 2025-07-20 17:28:06,762 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 221.79 221.79
- finished_output_tokens 52.37 52.37
- sglang_input_tokens 232.62 232.62
- sglang_output_tokens 67.34 67.34
- 2025-07-20 17:28:06,762 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 12
- 2025-07-20 17:28:07,199 - sglang - INFO - [2025-07-20 17:28:07 TP0] Prefill batch. #new-seq: 6, #new-token: 12589, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 5
- 2025-07-20 17:28:07,200 - __main__ - INFO - sglang running req: 1 queue req: 5
- 2025-07-20 17:28:11,289 - sglang - INFO - [2025-07-20 17:28:11 TP0] Decode batch. #running-req: 7, #token: 14137, token usage: 0.37, gen throughput (token/s): 14.72, #queue-req: 5
- 2025-07-20 17:28:11,290 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:12,176 - sglang - INFO - [2025-07-20 17:28:12 TP0] Decode batch. #running-req: 7, #token: 14417, token usage: 0.38, gen throughput (token/s): 315.67, #queue-req: 5
- 2025-07-20 17:28:12,177 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:13,062 - sglang - INFO - [2025-07-20 17:28:13 TP0] Decode batch. #running-req: 7, #token: 14697, token usage: 0.39, gen throughput (token/s): 316.16, #queue-req: 5
- 2025-07-20 17:28:13,062 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:13,952 - sglang - INFO - [2025-07-20 17:28:13 TP0] Decode batch. #running-req: 7, #token: 14977, token usage: 0.39, gen throughput (token/s): 314.76, #queue-req: 5
- 2025-07-20 17:28:13,952 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:14,842 - sglang - INFO - [2025-07-20 17:28:14 TP0] Decode batch. #running-req: 7, #token: 15257, token usage: 0.40, gen throughput (token/s): 314.66, #queue-req: 5
- 2025-07-20 17:28:14,842 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:15,730 - sglang - INFO - [2025-07-20 17:28:15 TP0] Decode batch. #running-req: 7, #token: 15537, token usage: 0.41, gen throughput (token/s): 315.13, #queue-req: 5
- 2025-07-20 17:28:15,730 - __main__ - INFO - sglang running req: 7 queue req: 5
- 2025-07-20 17:28:16,197 - sglang - INFO - [2025-07-20 17:28:16 TP0] Prefill batch. #new-seq: 2, #new-token: 5286, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 3
- 2025-07-20 17:28:16,197 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-07-20 17:28:16,764 - __main__ - INFO - Queue remaining: 27
- 2025-07-20 17:28:16,764 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 211.25 211.25
- finished_output_tokens 49.88 49.88
- sglang_input_tokens 228.62 228.62
- sglang_output_tokens 65.23 65.23
- 2025-07-20 17:28:16,765 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 12
- 2025-07-20 17:28:18,160 - sglang - INFO - [2025-07-20 17:28:18 TP0] Decode batch. #running-req: 8, #token: 19407, token usage: 0.51, gen throughput (token/s): 122.64, #queue-req: 3
- 2025-07-20 17:28:18,160 - __main__ - INFO - sglang running req: 8 queue req: 3
- 2025-07-20 17:28:18,497 - sglang - INFO - [2025-07-20 17:28:18 TP0] Prefill batch. #new-seq: 2, #new-token: 4907, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.46, #running-req: 7, #queue-req: 1
- 2025-07-20 17:28:18,498 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:20,558 - sglang - INFO - [2025-07-20 17:28:20 TP0] Decode batch. #running-req: 9, #token: 22604, token usage: 0.60, gen throughput (token/s): 143.43, #queue-req: 1
- 2025-07-20 17:28:20,559 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:28:21,518 - sglang - INFO - [2025-07-20 17:28:21 TP0] Decode batch. #running-req: 9, #token: 22964, token usage: 0.60, gen throughput (token/s): 375.31, #queue-req: 1
- 2025-07-20 17:28:21,518 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:28:21,685 - sglang - INFO - [2025-07-20 17:28:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2316, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 0
- 2025-07-20 17:28:21,685 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:28:23,224 - sglang - INFO - [2025-07-20 17:28:23 TP0] Decode batch. #running-req: 9, #token: 23144, token usage: 0.61, gen throughput (token/s): 210.34, #queue-req: 0
- 2025-07-20 17:28:23,225 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:28:24,182 - sglang - INFO - [2025-07-20 17:28:24 TP0] Decode batch. #running-req: 9, #token: 23504, token usage: 0.62, gen throughput (token/s): 375.79, #queue-req: 0
- 2025-07-20 17:28:24,183 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:28:25,139 - sglang - INFO - [2025-07-20 17:28:25 TP0] Decode batch. #running-req: 9, #token: 23864, token usage: 0.63, gen throughput (token/s): 376.40, #queue-req: 0
- 2025-07-20 17:28:25,139 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:28:26,051 - sglang - INFO - [2025-07-20 17:28:26 TP0] Decode batch. #running-req: 8, #token: 21936, token usage: 0.58, gen throughput (token/s): 358.60, #queue-req: 0
- 2025-07-20 17:28:26,051 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:28:26,766 - __main__ - INFO - Queue remaining: 27
- 2025-07-20 17:28:26,767 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 201.66 201.66
- finished_output_tokens 47.62 47.62
- sglang_input_tokens 264.74 264.74
- sglang_output_tokens 71.56 71.56
- 2025-07-20 17:28:26,767 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 6 | 12
- 2025-07-20 17:28:26,948 - sglang - INFO - [2025-07-20 17:28:26 TP0] Decode batch. #running-req: 6, #token: 16699, token usage: 0.44, gen throughput (token/s): 280.85, #queue-req: 0
- 2025-07-20 17:28:26,948 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:28:27,842 - sglang - INFO - [2025-07-20 17:28:27 TP0] Decode batch. #running-req: 6, #token: 16939, token usage: 0.45, gen throughput (token/s): 268.53, #queue-req: 0
- 2025-07-20 17:28:27,842 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:28:28,729 - sglang - INFO - [2025-07-20 17:28:28 TP0] Decode batch. #running-req: 5, #token: 14230, token usage: 0.37, gen throughput (token/s): 235.47, #queue-req: 0
- 2025-07-20 17:28:28,730 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:28:29,616 - sglang - INFO - [2025-07-20 17:28:29 TP0] Decode batch. #running-req: 5, #token: 14430, token usage: 0.38, gen throughput (token/s): 225.45, #queue-req: 0
- 2025-07-20 17:28:29,617 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:28:30,501 - sglang - INFO - [2025-07-20 17:28:30 TP0] Decode batch. #running-req: 5, #token: 14630, token usage: 0.39, gen throughput (token/s): 226.05, #queue-req: 0
- 2025-07-20 17:28:30,501 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:28:31,379 - sglang - INFO - [2025-07-20 17:28:31 TP0] Decode batch. #running-req: 4, #token: 12121, token usage: 0.32, gen throughput (token/s): 199.38, #queue-req: 0
- 2025-07-20 17:28:31,379 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:28:32,248 - sglang - INFO - [2025-07-20 17:28:32 TP0] Decode batch. #running-req: 4, #token: 12281, token usage: 0.32, gen throughput (token/s): 184.12, #queue-req: 0
- 2025-07-20 17:28:32,248 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:28:33,115 - sglang - INFO - [2025-07-20 17:28:33 TP0] Decode batch. #running-req: 3, #token: 9307, token usage: 0.25, gen throughput (token/s): 181.02, #queue-req: 0
- 2025-07-20 17:28:33,115 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:28:33,979 - sglang - INFO - [2025-07-20 17:28:33 TP0] Decode batch. #running-req: 3, #token: 9427, token usage: 0.25, gen throughput (token/s): 138.89, #queue-req: 0
- 2025-07-20 17:28:33,980 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:28:34,846 - sglang - INFO - [2025-07-20 17:28:34 TP0] Decode batch. #running-req: 3, #token: 9547, token usage: 0.25, gen throughput (token/s): 138.43, #queue-req: 0
- 2025-07-20 17:28:34,846 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:28:35,697 - sglang - INFO - [2025-07-20 17:28:35 TP0] Decode batch. #running-req: 1, #token: 2983, token usage: 0.08, gen throughput (token/s): 89.34, #queue-req: 0
- 2025-07-20 17:28:35,697 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:36,537 - sglang - INFO - [2025-07-20 17:28:36 TP0] Decode batch. #running-req: 1, #token: 3023, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:28:36,537 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:36,768 - __main__ - INFO - Queue remaining: 27
- 2025-07-20 17:28:36,768 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 192.91 192.91
- finished_output_tokens 45.55 45.55
- sglang_input_tokens 307.62 307.62
- sglang_output_tokens 80.81 80.81
- 2025-07-20 17:28:36,768 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 11 | 12
- 2025-07-20 17:28:37,376 - sglang - INFO - [2025-07-20 17:28:37 TP0] Decode batch. #running-req: 1, #token: 3063, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:28:37,377 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:38,216 - sglang - INFO - [2025-07-20 17:28:38 TP0] Decode batch. #running-req: 1, #token: 3103, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:28:38,217 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:39,054 - sglang - INFO - [2025-07-20 17:28:39 TP0] Decode batch. #running-req: 1, #token: 3143, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:28:39,054 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:39,888 - sglang - INFO - [2025-07-20 17:28:39 TP0] Decode batch. #running-req: 1, #token: 3183, token usage: 0.08, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:28:39,888 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:28:40,456 - __main__ - INFO - Finished TaskGroup for worker on 10dc5d29c3f17870daf918c9555cd0b939acbffe
- 2025-07-20 17:28:40,456 - __main__ - INFO - Got 1 docs for 10dc5d29c3f17870daf918c9555cd0b939acbffe
- 2025-07-20 17:28:40,458 - __main__ - INFO - Worker 0 processing work item d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
- 2025-07-20 17:28:40,458 - __main__ - INFO - Created all tasks for d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
- 2025-07-20 17:28:40,465 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
- 2025-07-20 17:28:40,602 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
- 2025-07-20 17:28:40,622 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
- 2025-07-20 17:28:40,629 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
- 2025-07-20 17:28:40,631 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
- 2025-07-20 17:28:40,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
- 2025-07-20 17:28:40,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
- 2025-07-20 17:28:40,675 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
- 2025-07-20 17:28:40,691 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
- 2025-07-20 17:28:40,827 - sglang - INFO - [2025-07-20 17:28:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2107, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:28:40,827 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:28:41,596 - sglang - INFO - [2025-07-20 17:28:41 TP0] Prefill batch. #new-seq: 6, #new-token: 13079, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 1
- 2025-07-20 17:28:41,596 - __main__ - INFO - sglang running req: 1 queue req: 1
- 2025-07-20 17:28:45,865 - sglang - INFO - [2025-07-20 17:28:45 TP0] Decode batch. #running-req: 7, #token: 15277, token usage: 0.40, gen throughput (token/s): 19.74, #queue-req: 1
- 2025-07-20 17:28:45,865 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:46,753 - sglang - INFO - [2025-07-20 17:28:46 TP0] Decode batch. #running-req: 7, #token: 15557, token usage: 0.41, gen throughput (token/s): 315.37, #queue-req: 1
- 2025-07-20 17:28:46,753 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:46,770 - __main__ - INFO - Queue remaining: 26
- 2025-07-20 17:28:46,770 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 295.43 295.43
- finished_output_tokens 68.69 68.69
- sglang_input_tokens 304.46 304.46
- sglang_output_tokens 81.16 81.16
- 2025-07-20 17:28:46,770 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 8
- 2025-07-20 17:28:47,633 - sglang - INFO - [2025-07-20 17:28:47 TP0] Decode batch. #running-req: 7, #token: 15837, token usage: 0.42, gen throughput (token/s): 318.01, #queue-req: 1
- 2025-07-20 17:28:47,634 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:48,516 - sglang - INFO - [2025-07-20 17:28:48 TP0] Decode batch. #running-req: 7, #token: 16117, token usage: 0.42, gen throughput (token/s): 317.36, #queue-req: 1
- 2025-07-20 17:28:48,516 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:49,407 - sglang - INFO - [2025-07-20 17:28:49 TP0] Decode batch. #running-req: 7, #token: 16397, token usage: 0.43, gen throughput (token/s): 314.14, #queue-req: 1
- 2025-07-20 17:28:49,407 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:50,299 - sglang - INFO - [2025-07-20 17:28:50 TP0] Decode batch. #running-req: 7, #token: 16677, token usage: 0.44, gen throughput (token/s): 314.00, #queue-req: 1
- 2025-07-20 17:28:50,299 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:28:50,969 - sglang - INFO - [2025-07-20 17:28:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2527, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 0
- 2025-07-20 17:28:50,970 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:28:51,946 - sglang - INFO - [2025-07-20 17:28:51 TP0] Decode batch. #running-req: 7, #token: 17261, token usage: 0.45, gen throughput (token/s): 169.35, #queue-req: 0
- 2025-07-20 17:28:51,946 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:28:52,842 - sglang - INFO - [2025-07-20 17:28:52 TP0] Decode batch. #running-req: 7, #token: 17541, token usage: 0.46, gen throughput (token/s): 312.64, #queue-req: 0
- 2025-07-20 17:28:52,842 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:28:53,733 - sglang - INFO - [2025-07-20 17:28:53 TP0] Decode batch. #running-req: 6, #token: 15468, token usage: 0.41, gen throughput (token/s): 283.97, #queue-req: 0
- 2025-07-20 17:28:53,733 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:28:54,620 - sglang - INFO - [2025-07-20 17:28:54 TP0] Decode batch. #running-req: 6, #token: 15708, token usage: 0.41, gen throughput (token/s): 270.48, #queue-req: 0
- 2025-07-20 17:28:54,620 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:28:55,498 - sglang - INFO - [2025-07-20 17:28:55 TP0] Decode batch. #running-req: 5, #token: 13251, token usage: 0.35, gen throughput (token/s): 232.29, #queue-req: 0
- 2025-07-20 17:28:55,498 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:28:56,382 - sglang - INFO - [2025-07-20 17:28:56 TP0] Decode batch. #running-req: 5, #token: 13451, token usage: 0.35, gen throughput (token/s): 226.28, #queue-req: 0
- 2025-07-20 17:28:56,382 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:28:56,771 - __main__ - INFO - Queue remaining: 26
- 2025-07-20 17:28:56,771 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 283.63 283.63
- finished_output_tokens 65.94 65.94
- sglang_input_tokens 326.78 326.78
- sglang_output_tokens 82.50 82.50
- 2025-07-20 17:28:56,771 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 8
- 2025-07-20 17:28:57,255 - sglang - INFO - [2025-07-20 17:28:57 TP0] Decode batch. #running-req: 4, #token: 11051, token usage: 0.29, gen throughput (token/s): 191.31, #queue-req: 0
- 2025-07-20 17:28:57,255 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:28:58,120 - sglang - INFO - [2025-07-20 17:28:58 TP0] Decode batch. #running-req: 3, #token: 8688, token usage: 0.23, gen throughput (token/s): 157.27, #queue-req: 0
- 2025-07-20 17:28:58,120 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:28:58,984 - sglang - INFO - [2025-07-20 17:28:58 TP0] Decode batch. #running-req: 3, #token: 8808, token usage: 0.23, gen throughput (token/s): 138.81, #queue-req: 0
- 2025-07-20 17:28:58,985 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:28:59,839 - sglang - INFO - [2025-07-20 17:28:59 TP0] Decode batch. #running-req: 1, #token: 3356, token usage: 0.09, gen throughput (token/s): 100.59, #queue-req: 0
- 2025-07-20 17:28:59,840 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:00,679 - sglang - INFO - [2025-07-20 17:29:00 TP0] Decode batch. #running-req: 1, #token: 3396, token usage: 0.09, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:29:00,680 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:01,519 - sglang - INFO - [2025-07-20 17:29:01 TP0] Decode batch. #running-req: 1, #token: 3436, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:29:01,519 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:02,357 - sglang - INFO - [2025-07-20 17:29:02 TP0] Decode batch. #running-req: 1, #token: 3476, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:29:02,357 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:03,194 - sglang - INFO - [2025-07-20 17:29:03 TP0] Decode batch. #running-req: 1, #token: 3516, token usage: 0.09, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:29:03,194 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:04,036 - sglang - INFO - [2025-07-20 17:29:04 TP0] Decode batch. #running-req: 1, #token: 3556, token usage: 0.09, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:29:04,036 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:04,877 - sglang - INFO - [2025-07-20 17:29:04 TP0] Decode batch. #running-req: 1, #token: 3596, token usage: 0.09, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:29:04,877 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:05,365 - __main__ - INFO - Finished TaskGroup for worker on d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
- 2025-07-20 17:29:05,365 - __main__ - INFO - Got 1 docs for d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
- 2025-07-20 17:29:05,366 - __main__ - INFO - Worker 0 processing work item 2ff00bac5e9500c24956e5386f6e7a49b2b55098
- 2025-07-20 17:29:05,366 - __main__ - INFO - Created all tasks for 2ff00bac5e9500c24956e5386f6e7a49b2b55098
- 2025-07-20 17:29:05,369 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
- 2025-07-20 17:29:05,428 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
- 2025-07-20 17:29:05,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
- 2025-07-20 17:29:05,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
- 2025-07-20 17:29:05,548 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
- 2025-07-20 17:29:05,566 - sglang - INFO - [2025-07-20 17:29:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:29:05,566 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:29:05,597 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
- 2025-07-20 17:29:06,061 - sglang - INFO - [2025-07-20 17:29:06 TP0] Prefill batch. #new-seq: 4, #new-token: 8959, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 0
- 2025-07-20 17:29:06,062 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:06,772 - __main__ - INFO - Queue remaining: 25
- 2025-07-20 17:29:06,772 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 340.74 340.74
- finished_output_tokens 77.72 77.72
- sglang_input_tokens 349.08 349.08
- sglang_output_tokens 89.24 89.24
- 2025-07-20 17:29:06,773 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-07-20 17:29:09,116 - sglang - INFO - [2025-07-20 17:29:09 TP0] Decode batch. #running-req: 5, #token: 10146, token usage: 0.27, gen throughput (token/s): 25.47, #queue-req: 0
- 2025-07-20 17:29:09,117 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:29:09,981 - sglang - INFO - [2025-07-20 17:29:09 TP0] Decode batch. #running-req: 4, #token: 9187, token usage: 0.24, gen throughput (token/s): 203.50, #queue-req: 0
- 2025-07-20 17:29:09,982 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:10,840 - sglang - INFO - [2025-07-20 17:29:10 TP0] Decode batch. #running-req: 4, #token: 9347, token usage: 0.25, gen throughput (token/s): 186.41, #queue-req: 0
- 2025-07-20 17:29:10,840 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:11,707 - sglang - INFO - [2025-07-20 17:29:11 TP0] Decode batch. #running-req: 4, #token: 9507, token usage: 0.25, gen throughput (token/s): 184.57, #queue-req: 0
- 2025-07-20 17:29:11,707 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:12,575 - sglang - INFO - [2025-07-20 17:29:12 TP0] Decode batch. #running-req: 4, #token: 9667, token usage: 0.25, gen throughput (token/s): 184.27, #queue-req: 0
- 2025-07-20 17:29:12,575 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:13,442 - sglang - INFO - [2025-07-20 17:29:13 TP0] Decode batch. #running-req: 4, #token: 9827, token usage: 0.26, gen throughput (token/s): 184.39, #queue-req: 0
- 2025-07-20 17:29:13,443 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:14,310 - sglang - INFO - [2025-07-20 17:29:14 TP0] Decode batch. #running-req: 4, #token: 9987, token usage: 0.26, gen throughput (token/s): 184.45, #queue-req: 0
- 2025-07-20 17:29:14,310 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:15,179 - sglang - INFO - [2025-07-20 17:29:15 TP0] Decode batch. #running-req: 4, #token: 10147, token usage: 0.27, gen throughput (token/s): 184.17, #queue-req: 0
- 2025-07-20 17:29:15,179 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:16,046 - sglang - INFO - [2025-07-20 17:29:16 TP0] Decode batch. #running-req: 4, #token: 10307, token usage: 0.27, gen throughput (token/s): 184.54, #queue-req: 0
- 2025-07-20 17:29:16,046 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:16,775 - __main__ - INFO - Queue remaining: 25
- 2025-07-20 17:29:16,775 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 328.14 328.14
- finished_output_tokens 74.85 74.85
- sglang_input_tokens 340.25 340.25
- sglang_output_tokens 86.06 86.06
- 2025-07-20 17:29:16,775 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 5
- 2025-07-20 17:29:16,915 - sglang - INFO - [2025-07-20 17:29:16 TP0] Decode batch. #running-req: 4, #token: 10467, token usage: 0.28, gen throughput (token/s): 184.01, #queue-req: 0
- 2025-07-20 17:29:16,915 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:17,780 - sglang - INFO - [2025-07-20 17:29:17 TP0] Decode batch. #running-req: 4, #token: 10627, token usage: 0.28, gen throughput (token/s): 184.95, #queue-req: 0
- 2025-07-20 17:29:17,781 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:18,647 - sglang - INFO - [2025-07-20 17:29:18 TP0] Decode batch. #running-req: 4, #token: 10787, token usage: 0.28, gen throughput (token/s): 184.72, #queue-req: 0
- 2025-07-20 17:29:18,647 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:19,507 - sglang - INFO - [2025-07-20 17:29:19 TP0] Decode batch. #running-req: 2, #token: 5519, token usage: 0.15, gen throughput (token/s): 126.68, #queue-req: 0
- 2025-07-20 17:29:19,507 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:29:20,356 - sglang - INFO - [2025-07-20 17:29:20 TP0] Decode batch. #running-req: 2, #token: 5599, token usage: 0.15, gen throughput (token/s): 94.22, #queue-req: 0
- 2025-07-20 17:29:20,356 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:29:21,205 - sglang - INFO - [2025-07-20 17:29:21 TP0] Decode batch. #running-req: 2, #token: 5679, token usage: 0.15, gen throughput (token/s): 94.20, #queue-req: 0
- 2025-07-20 17:29:21,205 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:29:22,049 - sglang - INFO - [2025-07-20 17:29:22 TP0] Decode batch. #running-req: 1, #token: 3150, token usage: 0.08, gen throughput (token/s): 62.77, #queue-req: 0
- 2025-07-20 17:29:22,050 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:22,099 - __main__ - INFO - Finished TaskGroup for worker on 2ff00bac5e9500c24956e5386f6e7a49b2b55098
- 2025-07-20 17:29:22,099 - __main__ - INFO - Got 1 docs for 2ff00bac5e9500c24956e5386f6e7a49b2b55098
- 2025-07-20 17:29:22,100 - __main__ - INFO - Worker 0 processing work item aef98857329873e434b4b835531b5abd2cfca622
- 2025-07-20 17:29:22,101 - __main__ - INFO - Created all tasks for aef98857329873e434b4b835531b5abd2cfca622
- 2025-07-20 17:29:22,108 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
- 2025-07-20 17:29:22,218 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
- 2025-07-20 17:29:22,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
- 2025-07-20 17:29:22,298 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
- 2025-07-20 17:29:22,300 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
- 2025-07-20 17:29:22,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
- 2025-07-20 17:29:22,364 - sglang - INFO - [2025-07-20 17:29:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:29:22,364 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:29:22,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
- 2025-07-20 17:29:22,941 - sglang - INFO - [2025-07-20 17:29:22 TP0] Prefill batch. #new-seq: 5, #new-token: 11856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:29:22,941 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:26,776 - __main__ - INFO - Queue remaining: 24
- 2025-07-20 17:29:26,777 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 352.31 352.31
- finished_output_tokens 79.99 79.99
- sglang_input_tokens 360.06 360.06
- sglang_output_tokens 90.68 90.68
- 2025-07-20 17:29:26,777 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:29:27,285 - sglang - INFO - [2025-07-20 17:29:27 TP0] Decode batch. #running-req: 6, #token: 13580, token usage: 0.36, gen throughput (token/s): 43.93, #queue-req: 0
- 2025-07-20 17:29:27,285 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:29:28,173 - sglang - INFO - [2025-07-20 17:29:28 TP0] Decode batch. #running-req: 6, #token: 13820, token usage: 0.36, gen throughput (token/s): 270.34, #queue-req: 0
- 2025-07-20 17:29:28,173 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:29:29,062 - sglang - INFO - [2025-07-20 17:29:29 TP0] Decode batch. #running-req: 6, #token: 14060, token usage: 0.37, gen throughput (token/s): 269.89, #queue-req: 0
- 2025-07-20 17:29:29,062 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:29:29,951 - sglang - INFO - [2025-07-20 17:29:29 TP0] Decode batch. #running-req: 6, #token: 14300, token usage: 0.38, gen throughput (token/s): 270.15, #queue-req: 0
- 2025-07-20 17:29:29,951 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:29:30,838 - sglang - INFO - [2025-07-20 17:29:30 TP0] Decode batch. #running-req: 6, #token: 14540, token usage: 0.38, gen throughput (token/s): 270.35, #queue-req: 0
- 2025-07-20 17:29:30,838 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:29:31,722 - sglang - INFO - [2025-07-20 17:29:31 TP0] Decode batch. #running-req: 5, #token: 13046, token usage: 0.34, gen throughput (token/s): 256.71, #queue-req: 0
- 2025-07-20 17:29:31,723 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:29:32,599 - sglang - INFO - [2025-07-20 17:29:32 TP0] Decode batch. #running-req: 5, #token: 13246, token usage: 0.35, gen throughput (token/s): 228.13, #queue-req: 0
- 2025-07-20 17:29:32,599 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:29:33,466 - sglang - INFO - [2025-07-20 17:29:33 TP0] Decode batch. #running-req: 4, #token: 10752, token usage: 0.28, gen throughput (token/s): 189.13, #queue-req: 0
- 2025-07-20 17:29:33,466 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:34,338 - sglang - INFO - [2025-07-20 17:29:34 TP0] Decode batch. #running-req: 4, #token: 10912, token usage: 0.29, gen throughput (token/s): 183.59, #queue-req: 0
- 2025-07-20 17:29:34,338 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:29:35,204 - sglang - INFO - [2025-07-20 17:29:35 TP0] Decode batch. #running-req: 3, #token: 8986, token usage: 0.24, gen throughput (token/s): 151.28, #queue-req: 0
- 2025-07-20 17:29:35,204 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:36,066 - sglang - INFO - [2025-07-20 17:29:36 TP0] Decode batch. #running-req: 3, #token: 9106, token usage: 0.24, gen throughput (token/s): 139.23, #queue-req: 0
- 2025-07-20 17:29:36,066 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:36,778 - __main__ - INFO - Queue remaining: 24
- 2025-07-20 17:29:36,779 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 340.18 340.18
- finished_output_tokens 77.23 77.23
- sglang_input_tokens 366.80 366.80
- sglang_output_tokens 90.58 90.58
- 2025-07-20 17:29:36,779 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 6
- 2025-07-20 17:29:36,931 - sglang - INFO - [2025-07-20 17:29:36 TP0] Decode batch. #running-req: 3, #token: 9226, token usage: 0.24, gen throughput (token/s): 138.75, #queue-req: 0
- 2025-07-20 17:29:36,931 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:37,796 - sglang - INFO - [2025-07-20 17:29:37 TP0] Decode batch. #running-req: 3, #token: 9346, token usage: 0.25, gen throughput (token/s): 138.72, #queue-req: 0
- 2025-07-20 17:29:37,796 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:38,660 - sglang - INFO - [2025-07-20 17:29:38 TP0] Decode batch. #running-req: 3, #token: 9466, token usage: 0.25, gen throughput (token/s): 138.82, #queue-req: 0
- 2025-07-20 17:29:38,660 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:39,525 - sglang - INFO - [2025-07-20 17:29:39 TP0] Decode batch. #running-req: 3, #token: 9586, token usage: 0.25, gen throughput (token/s): 138.79, #queue-req: 0
- 2025-07-20 17:29:39,525 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:40,383 - sglang - INFO - [2025-07-20 17:29:40 TP0] Decode batch. #running-req: 3, #token: 9706, token usage: 0.26, gen throughput (token/s): 139.75, #queue-req: 0
- 2025-07-20 17:29:40,384 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:29:41,243 - sglang - INFO - [2025-07-20 17:29:41 TP0] Decode batch. #running-req: 2, #token: 6529, token usage: 0.17, gen throughput (token/s): 125.58, #queue-req: 0
- 2025-07-20 17:29:41,244 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:29:42,096 - sglang - INFO - [2025-07-20 17:29:42 TP0] Decode batch. #running-req: 2, #token: 6609, token usage: 0.17, gen throughput (token/s): 93.83, #queue-req: 0
- 2025-07-20 17:29:42,096 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:29:42,945 - sglang - INFO - [2025-07-20 17:29:42 TP0] Decode batch. #running-req: 1, #token: 3444, token usage: 0.09, gen throughput (token/s): 78.92, #queue-req: 0
- 2025-07-20 17:29:42,945 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:43,786 - sglang - INFO - [2025-07-20 17:29:43 TP0] Decode batch. #running-req: 1, #token: 3484, token usage: 0.09, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:29:43,786 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:44,627 - sglang - INFO - [2025-07-20 17:29:44 TP0] Decode batch. #running-req: 1, #token: 3524, token usage: 0.09, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:29:44,627 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:45,469 - sglang - INFO - [2025-07-20 17:29:45 TP0] Decode batch. #running-req: 1, #token: 3564, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:29:45,469 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:46,106 - __main__ - INFO - Finished TaskGroup for worker on aef98857329873e434b4b835531b5abd2cfca622
- 2025-07-20 17:29:46,106 - __main__ - INFO - Got 1 docs for aef98857329873e434b4b835531b5abd2cfca622
- 2025-07-20 17:29:46,108 - __main__ - INFO - Worker 0 processing work item 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 17:29:46,108 - __main__ - INFO - Created all tasks for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 17:29:46,114 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
- 2025-07-20 17:29:46,271 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
- 2025-07-20 17:29:46,289 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
- 2025-07-20 17:29:46,311 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
- 2025-07-20 17:29:46,355 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
- 2025-07-20 17:29:46,392 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
- 2025-07-20 17:29:46,401 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
- 2025-07-20 17:29:46,417 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
- 2025-07-20 17:29:46,443 - sglang - INFO - [2025-07-20 17:29:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:29:46,443 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:29:46,780 - __main__ - INFO - Queue remaining: 23
- 2025-07-20 17:29:46,781 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 373.29 373.90
- finished_output_tokens 85.31 85.45
- sglang_input_tokens 380.52 381.15
- sglang_output_tokens 95.30 95.45
- 2025-07-20 17:29:46,781 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 17:29:47,070 - sglang - INFO - [2025-07-20 17:29:47 TP0] Prefill batch. #new-seq: 6, #new-token: 14500, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-07-20 17:29:47,070 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:29:51,561 - sglang - INFO - [2025-07-20 17:29:51 TP0] Decode batch. #running-req: 7, #token: 16356, token usage: 0.43, gen throughput (token/s): 16.42, #queue-req: 0
- 2025-07-20 17:29:51,561 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:52,456 - sglang - INFO - [2025-07-20 17:29:52 TP0] Decode batch. #running-req: 7, #token: 16636, token usage: 0.44, gen throughput (token/s): 312.96, #queue-req: 0
- 2025-07-20 17:29:52,456 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:53,348 - sglang - INFO - [2025-07-20 17:29:53 TP0] Decode batch. #running-req: 7, #token: 16916, token usage: 0.45, gen throughput (token/s): 313.71, #queue-req: 0
- 2025-07-20 17:29:53,348 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:54,238 - sglang - INFO - [2025-07-20 17:29:54 TP0] Decode batch. #running-req: 7, #token: 17196, token usage: 0.45, gen throughput (token/s): 314.82, #queue-req: 0
- 2025-07-20 17:29:54,238 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:55,129 - sglang - INFO - [2025-07-20 17:29:55 TP0] Decode batch. #running-req: 7, #token: 17476, token usage: 0.46, gen throughput (token/s): 314.05, #queue-req: 0
- 2025-07-20 17:29:55,129 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:56,024 - sglang - INFO - [2025-07-20 17:29:56 TP0] Decode batch. #running-req: 7, #token: 17756, token usage: 0.47, gen throughput (token/s): 312.98, #queue-req: 0
- 2025-07-20 17:29:56,024 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:56,783 - __main__ - INFO - Queue remaining: 23
- 2025-07-20 17:29:56,783 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 361.27 373.90
- finished_output_tokens 82.57 85.45
- sglang_input_tokens 368.26 381.15
- sglang_output_tokens 92.23 95.45
- 2025-07-20 17:29:56,783 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 17:29:56,923 - sglang - INFO - [2025-07-20 17:29:56 TP0] Decode batch. #running-req: 7, #token: 18036, token usage: 0.47, gen throughput (token/s): 311.45, #queue-req: 0
- 2025-07-20 17:29:56,923 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:57,821 - sglang - INFO - [2025-07-20 17:29:57 TP0] Decode batch. #running-req: 7, #token: 18316, token usage: 0.48, gen throughput (token/s): 311.77, #queue-req: 0
- 2025-07-20 17:29:57,821 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:58,720 - sglang - INFO - [2025-07-20 17:29:58 TP0] Decode batch. #running-req: 7, #token: 18596, token usage: 0.49, gen throughput (token/s): 311.35, #queue-req: 0
- 2025-07-20 17:29:58,721 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:29:59,619 - sglang - INFO - [2025-07-20 17:29:59 TP0] Decode batch. #running-req: 7, #token: 18876, token usage: 0.50, gen throughput (token/s): 311.40, #queue-req: 0
- 2025-07-20 17:29:59,620 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:30:00,518 - sglang - INFO - [2025-07-20 17:30:00 TP0] Decode batch. #running-req: 7, #token: 19156, token usage: 0.50, gen throughput (token/s): 311.43, #queue-req: 0
- 2025-07-20 17:30:00,519 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:30:01,417 - sglang - INFO - [2025-07-20 17:30:01 TP0] Decode batch. #running-req: 6, #token: 17200, token usage: 0.45, gen throughput (token/s): 296.15, #queue-req: 0
- 2025-07-20 17:30:01,417 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:02,307 - sglang - INFO - [2025-07-20 17:30:02 TP0] Decode batch. #running-req: 6, #token: 17440, token usage: 0.46, gen throughput (token/s): 269.65, #queue-req: 0
- 2025-07-20 17:30:02,307 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:03,196 - sglang - INFO - [2025-07-20 17:30:03 TP0] Decode batch. #running-req: 6, #token: 17680, token usage: 0.47, gen throughput (token/s): 269.98, #queue-req: 0
- 2025-07-20 17:30:03,196 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:04,093 - sglang - INFO - [2025-07-20 17:30:04 TP0] Decode batch. #running-req: 6, #token: 17920, token usage: 0.47, gen throughput (token/s): 267.37, #queue-req: 0
- 2025-07-20 17:30:04,094 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:04,989 - sglang - INFO - [2025-07-20 17:30:04 TP0] Decode batch. #running-req: 6, #token: 18160, token usage: 0.48, gen throughput (token/s): 267.81, #queue-req: 0
- 2025-07-20 17:30:04,989 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:05,868 - sglang - INFO - [2025-07-20 17:30:05 TP0] Decode batch. #running-req: 4, #token: 12461, token usage: 0.33, gen throughput (token/s): 196.91, #queue-req: 0
- 2025-07-20 17:30:05,868 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:30:06,742 - sglang - INFO - [2025-07-20 17:30:06 TP0] Decode batch. #running-req: 4, #token: 12621, token usage: 0.33, gen throughput (token/s): 183.02, #queue-req: 0
- 2025-07-20 17:30:06,743 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:30:06,784 - __main__ - INFO - Queue remaining: 23
- 2025-07-20 17:30:06,784 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 349.99 373.90
- finished_output_tokens 79.99 85.45
- sglang_input_tokens 376.82 402.56
- sglang_output_tokens 94.57 101.03
- 2025-07-20 17:30:06,785 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 7
- 2025-07-20 17:30:07,611 - sglang - INFO - [2025-07-20 17:30:07 TP0] Decode batch. #running-req: 3, #token: 9603, token usage: 0.25, gen throughput (token/s): 148.50, #queue-req: 0
- 2025-07-20 17:30:07,611 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:30:08,476 - sglang - INFO - [2025-07-20 17:30:08 TP0] Decode batch. #running-req: 3, #token: 9723, token usage: 0.26, gen throughput (token/s): 138.72, #queue-req: 0
- 2025-07-20 17:30:08,476 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:30:09,325 - sglang - INFO - [2025-07-20 17:30:09 TP0] Decode batch. #running-req: 2, #token: 6582, token usage: 0.17, gen throughput (token/s): 100.10, #queue-req: 0
- 2025-07-20 17:30:09,326 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:10,170 - sglang - INFO - [2025-07-20 17:30:10 TP0] Decode batch. #running-req: 2, #token: 6662, token usage: 0.18, gen throughput (token/s): 94.69, #queue-req: 0
- 2025-07-20 17:30:10,170 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:11,018 - sglang - INFO - [2025-07-20 17:30:11 TP0] Decode batch. #running-req: 2, #token: 6742, token usage: 0.18, gen throughput (token/s): 94.36, #queue-req: 0
- 2025-07-20 17:30:11,018 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:11,871 - sglang - INFO - [2025-07-20 17:30:11 TP0] Decode batch. #running-req: 2, #token: 6822, token usage: 0.18, gen throughput (token/s): 93.79, #queue-req: 0
- 2025-07-20 17:30:11,871 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:12,721 - sglang - INFO - [2025-07-20 17:30:12 TP0] Decode batch. #running-req: 2, #token: 6902, token usage: 0.18, gen throughput (token/s): 94.13, #queue-req: 0
- 2025-07-20 17:30:12,721 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:13,572 - sglang - INFO - [2025-07-20 17:30:13 TP0] Decode batch. #running-req: 2, #token: 6982, token usage: 0.18, gen throughput (token/s): 94.00, #queue-req: 0
- 2025-07-20 17:30:13,572 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:14,426 - sglang - INFO - [2025-07-20 17:30:14 TP0] Decode batch. #running-req: 2, #token: 7062, token usage: 0.19, gen throughput (token/s): 93.63, #queue-req: 0
- 2025-07-20 17:30:14,426 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:30:15,274 - sglang - INFO - [2025-07-20 17:30:15 TP0] Decode batch. #running-req: 1, #token: 3634, token usage: 0.10, gen throughput (token/s): 68.40, #queue-req: 0
- 2025-07-20 17:30:15,274 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:15,324 - __main__ - INFO - Finished TaskGroup for worker on 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 17:30:15,324 - __main__ - INFO - Got 1 docs for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
- 2025-07-20 17:30:15,326 - __main__ - INFO - Worker 0 processing work item f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
- 2025-07-20 17:30:15,326 - __main__ - INFO - Created all tasks for f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
- 2025-07-20 17:30:15,333 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
- 2025-07-20 17:30:15,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
- 2025-07-20 17:30:15,478 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
- 2025-07-20 17:30:15,487 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
- 2025-07-20 17:30:15,508 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
- 2025-07-20 17:30:15,524 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
- 2025-07-20 17:30:15,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
- 2025-07-20 17:30:15,556 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
- 2025-07-20 17:30:15,605 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
- 2025-07-20 17:30:15,634 - sglang - INFO - [2025-07-20 17:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:30:15,634 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:30:16,234 - sglang - INFO - [2025-07-20 17:30:16 TP0] Prefill batch. #new-seq: 6, #new-token: 12982, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 1
- 2025-07-20 17:30:16,234 - __main__ - INFO - sglang running req: 1 queue req: 1
- 2025-07-20 17:30:16,786 - __main__ - INFO - Queue remaining: 22
- 2025-07-20 17:30:16,787 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 388.68 428.19
- finished_output_tokens 93.63 103.15
- sglang_input_tokens 395.25 435.43
- sglang_output_tokens 102.71 113.15
- 2025-07-20 17:30:16,787 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 8
- 2025-07-20 17:30:21,035 - sglang - INFO - [2025-07-20 17:30:21 TP0] Decode batch. #running-req: 7, #token: 14801, token usage: 0.39, gen throughput (token/s): 46.52, #queue-req: 1
- 2025-07-20 17:30:21,036 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:21,926 - sglang - INFO - [2025-07-20 17:30:21 TP0] Decode batch. #running-req: 7, #token: 15081, token usage: 0.40, gen throughput (token/s): 314.31, #queue-req: 1
- 2025-07-20 17:30:21,927 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:22,817 - sglang - INFO - [2025-07-20 17:30:22 TP0] Decode batch. #running-req: 7, #token: 15361, token usage: 0.40, gen throughput (token/s): 314.34, #queue-req: 1
- 2025-07-20 17:30:22,817 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:23,708 - sglang - INFO - [2025-07-20 17:30:23 TP0] Decode batch. #running-req: 7, #token: 15641, token usage: 0.41, gen throughput (token/s): 314.11, #queue-req: 1
- 2025-07-20 17:30:23,709 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:24,598 - sglang - INFO - [2025-07-20 17:30:24 TP0] Decode batch. #running-req: 7, #token: 15921, token usage: 0.42, gen throughput (token/s): 314.86, #queue-req: 1
- 2025-07-20 17:30:24,598 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:25,488 - sglang - INFO - [2025-07-20 17:30:25 TP0] Decode batch. #running-req: 7, #token: 16201, token usage: 0.43, gen throughput (token/s): 314.63, #queue-req: 1
- 2025-07-20 17:30:25,488 - __main__ - INFO - sglang running req: 7 queue req: 1
- 2025-07-20 17:30:25,555 - sglang - INFO - [2025-07-20 17:30:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2623, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 0
- 2025-07-20 17:30:25,555 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:26,788 - __main__ - INFO - Queue remaining: 22
- 2025-07-20 17:30:26,789 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 377.26 428.19
- finished_output_tokens 90.88 103.15
- sglang_input_tokens 388.20 440.61
- sglang_output_tokens 100.40 113.96
- 2025-07-20 17:30:26,789 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 8
- 2025-07-20 17:30:27,165 - sglang - INFO - [2025-07-20 17:30:27 TP0] Decode batch. #running-req: 7, #token: 17310, token usage: 0.46, gen throughput (token/s): 166.34, #queue-req: 0
- 2025-07-20 17:30:27,165 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:30:28,057 - sglang - INFO - [2025-07-20 17:30:28 TP0] Decode batch. #running-req: 6, #token: 15265, token usage: 0.40, gen throughput (token/s): 274.66, #queue-req: 0
- 2025-07-20 17:30:28,057 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:28,948 - sglang - INFO - [2025-07-20 17:30:28 TP0] Decode batch. #running-req: 6, #token: 15505, token usage: 0.41, gen throughput (token/s): 269.43, #queue-req: 0
- 2025-07-20 17:30:28,948 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:29,838 - sglang - INFO - [2025-07-20 17:30:29 TP0] Decode batch. #running-req: 6, #token: 15745, token usage: 0.41, gen throughput (token/s): 269.63, #queue-req: 0
- 2025-07-20 17:30:29,838 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:30,727 - sglang - INFO - [2025-07-20 17:30:30 TP0] Decode batch. #running-req: 6, #token: 15985, token usage: 0.42, gen throughput (token/s): 269.91, #queue-req: 0
- 2025-07-20 17:30:30,727 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:30:31,610 - sglang - INFO - [2025-07-20 17:30:31 TP0] Decode batch. #running-req: 5, #token: 13627, token usage: 0.36, gen throughput (token/s): 229.94, #queue-req: 0
- 2025-07-20 17:30:31,610 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:30:32,489 - sglang - INFO - [2025-07-20 17:30:32 TP0] Decode batch. #running-req: 5, #token: 13827, token usage: 0.36, gen throughput (token/s): 227.56, #queue-req: 0
- 2025-07-20 17:30:32,489 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:30:33,373 - sglang - INFO - [2025-07-20 17:30:33 TP0] Decode batch. #running-req: 5, #token: 14027, token usage: 0.37, gen throughput (token/s): 226.28, #queue-req: 0
- 2025-07-20 17:30:33,373 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:30:34,261 - sglang - INFO - [2025-07-20 17:30:34 TP0] Decode batch. #running-req: 5, #token: 14227, token usage: 0.37, gen throughput (token/s): 225.06, #queue-req: 0
- 2025-07-20 17:30:34,262 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:30:35,137 - sglang - INFO - [2025-07-20 17:30:35 TP0] Decode batch. #running-req: 3, #token: 8548, token usage: 0.23, gen throughput (token/s): 188.35, #queue-req: 0
- 2025-07-20 17:30:35,138 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:30:36,000 - sglang - INFO - [2025-07-20 17:30:36 TP0] Decode batch. #running-req: 3, #token: 8668, token usage: 0.23, gen throughput (token/s): 139.04, #queue-req: 0
- 2025-07-20 17:30:36,001 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:30:36,790 - __main__ - INFO - Queue remaining: 22
- 2025-07-20 17:30:36,790 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 366.49 428.19
- finished_output_tokens 88.29 103.15
- sglang_input_tokens 414.16 483.88
- sglang_output_tokens 107.09 125.12
- 2025-07-20 17:30:36,790 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 8
- 2025-07-20 17:30:36,848 - sglang - INFO - [2025-07-20 17:30:36 TP0] Decode batch. #running-req: 1, #token: 3100, token usage: 0.08, gen throughput (token/s): 73.12, #queue-req: 0
- 2025-07-20 17:30:36,849 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:37,690 - sglang - INFO - [2025-07-20 17:30:37 TP0] Decode batch. #running-req: 1, #token: 3140, token usage: 0.08, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:30:37,690 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:38,532 - sglang - INFO - [2025-07-20 17:30:38 TP0] Decode batch. #running-req: 1, #token: 3180, token usage: 0.08, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:30:38,532 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:39,374 - sglang - INFO - [2025-07-20 17:30:39 TP0] Decode batch. #running-req: 1, #token: 3220, token usage: 0.08, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:30:39,374 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:40,207 - sglang - INFO - [2025-07-20 17:30:40 TP0] Decode batch. #running-req: 1, #token: 3260, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:30:40,208 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:41,046 - sglang - INFO - [2025-07-20 17:30:41 TP0] Decode batch. #running-req: 1, #token: 3300, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:30:41,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:41,889 - sglang - INFO - [2025-07-20 17:30:41 TP0] Decode batch. #running-req: 1, #token: 3340, token usage: 0.09, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:30:41,889 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:42,729 - sglang - INFO - [2025-07-20 17:30:42 TP0] Decode batch. #running-req: 1, #token: 3380, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:30:42,729 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:43,569 - sglang - INFO - [2025-07-20 17:30:43 TP0] Decode batch. #running-req: 1, #token: 3420, token usage: 0.09, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:30:43,570 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:44,409 - sglang - INFO - [2025-07-20 17:30:44 TP0] Decode batch. #running-req: 1, #token: 3460, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:30:44,410 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:45,249 - sglang - INFO - [2025-07-20 17:30:45 TP0] Decode batch. #running-req: 1, #token: 3500, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:30:45,249 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:46,088 - sglang - INFO - [2025-07-20 17:30:46 TP0] Decode batch. #running-req: 1, #token: 3540, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:30:46,088 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:46,792 - __main__ - INFO - Queue remaining: 22
- 2025-07-20 17:30:46,792 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 356.33 428.19
- finished_output_tokens 85.84 103.15
- sglang_input_tokens 402.67 483.88
- sglang_output_tokens 104.12 125.12
- 2025-07-20 17:30:46,792 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 8
- 2025-07-20 17:30:46,921 - sglang - INFO - [2025-07-20 17:30:46 TP0] Decode batch. #running-req: 1, #token: 3580, token usage: 0.09, gen throughput (token/s): 48.03, #queue-req: 0
- 2025-07-20 17:30:46,921 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:47,751 - sglang - INFO - [2025-07-20 17:30:47 TP0] Decode batch. #running-req: 1, #token: 3620, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
- 2025-07-20 17:30:47,752 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:48,588 - sglang - INFO - [2025-07-20 17:30:48 TP0] Decode batch. #running-req: 1, #token: 3660, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:30:48,589 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:30:49,035 - __main__ - INFO - Finished TaskGroup for worker on f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
- 2025-07-20 17:30:49,035 - __main__ - INFO - Got 1 docs for f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
- 2025-07-20 17:30:49,036 - __main__ - INFO - Worker 0 processing work item dbac13d5d8d14af821606b2b6fcec79288c911ad
- 2025-07-20 17:30:49,036 - __main__ - INFO - Created all tasks for dbac13d5d8d14af821606b2b6fcec79288c911ad
- 2025-07-20 17:30:49,045 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
- 2025-07-20 17:30:49,160 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
- 2025-07-20 17:30:49,165 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
- 2025-07-20 17:30:49,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
- 2025-07-20 17:30:49,204 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
- 2025-07-20 17:30:49,208 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
- 2025-07-20 17:30:49,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
- 2025-07-20 17:30:49,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
- 2025-07-20 17:30:49,256 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
- 2025-07-20 17:30:49,300 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
- 2025-07-20 17:30:49,331 - sglang - INFO - [2025-07-20 17:30:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2100, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:30:49,332 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:30:50,082 - sglang - INFO - [2025-07-20 17:30:50 TP0] Prefill batch. #new-seq: 6, #new-token: 12724, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 2
- 2025-07-20 17:30:50,082 - __main__ - INFO - sglang running req: 1 queue req: 2
- 2025-07-20 17:30:54,426 - sglang - INFO - [2025-07-20 17:30:54 TP0] Decode batch. #running-req: 7, #token: 14957, token usage: 0.39, gen throughput (token/s): 26.38, #queue-req: 2
- 2025-07-20 17:30:54,426 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:55,307 - sglang - INFO - [2025-07-20 17:30:55 TP0] Decode batch. #running-req: 7, #token: 15237, token usage: 0.40, gen throughput (token/s): 317.59, #queue-req: 2
- 2025-07-20 17:30:55,308 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:56,197 - sglang - INFO - [2025-07-20 17:30:56 TP0] Decode batch. #running-req: 7, #token: 15517, token usage: 0.41, gen throughput (token/s): 314.76, #queue-req: 2
- 2025-07-20 17:30:56,197 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:56,794 - __main__ - INFO - Queue remaining: 21
- 2025-07-20 17:30:56,794 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 393.02 485.38
- finished_output_tokens 96.07 118.65
- sglang_input_tokens 398.88 492.63
- sglang_output_tokens 104.17 128.65
- 2025-07-20 17:30:56,794 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 9
- 2025-07-20 17:30:57,088 - sglang - INFO - [2025-07-20 17:30:57 TP0] Decode batch. #running-req: 7, #token: 15797, token usage: 0.42, gen throughput (token/s): 314.20, #queue-req: 2
- 2025-07-20 17:30:57,088 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:57,978 - sglang - INFO - [2025-07-20 17:30:57 TP0] Decode batch. #running-req: 7, #token: 16077, token usage: 0.42, gen throughput (token/s): 314.51, #queue-req: 2
- 2025-07-20 17:30:57,979 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:58,873 - sglang - INFO - [2025-07-20 17:30:58 TP0] Decode batch. #running-req: 7, #token: 16357, token usage: 0.43, gen throughput (token/s): 313.13, #queue-req: 2
- 2025-07-20 17:30:58,873 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:30:59,500 - sglang - INFO - [2025-07-20 17:30:59 TP0] Prefill batch. #new-seq: 2, #new-token: 5006, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 0
- 2025-07-20 17:30:59,500 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:01,286 - sglang - INFO - [2025-07-20 17:31:01 TP0] Decode batch. #running-req: 8, #token: 19280, token usage: 0.51, gen throughput (token/s): 120.56, #queue-req: 0
- 2025-07-20 17:31:01,287 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:31:02,177 - sglang - INFO - [2025-07-20 17:31:02 TP0] Decode batch. #running-req: 7, #token: 17568, token usage: 0.46, gen throughput (token/s): 318.72, #queue-req: 0
- 2025-07-20 17:31:02,178 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:31:03,067 - sglang - INFO - [2025-07-20 17:31:03 TP0] Decode batch. #running-req: 7, #token: 17848, token usage: 0.47, gen throughput (token/s): 314.84, #queue-req: 0
- 2025-07-20 17:31:03,067 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:31:03,965 - sglang - INFO - [2025-07-20 17:31:03 TP0] Decode batch. #running-req: 7, #token: 18128, token usage: 0.48, gen throughput (token/s): 311.90, #queue-req: 0
- 2025-07-20 17:31:03,965 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:31:04,850 - sglang - INFO - [2025-07-20 17:31:04 TP0] Decode batch. #running-req: 5, #token: 13723, token usage: 0.36, gen throughput (token/s): 232.52, #queue-req: 0
- 2025-07-20 17:31:04,851 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:05,730 - sglang - INFO - [2025-07-20 17:31:05 TP0] Decode batch. #running-req: 4, #token: 11236, token usage: 0.30, gen throughput (token/s): 205.80, #queue-req: 0
- 2025-07-20 17:31:05,730 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:06,600 - sglang - INFO - [2025-07-20 17:31:06 TP0] Decode batch. #running-req: 4, #token: 11396, token usage: 0.30, gen throughput (token/s): 183.86, #queue-req: 0
- 2025-07-20 17:31:06,601 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:06,795 - __main__ - INFO - Queue remaining: 21
- 2025-07-20 17:31:06,796 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 382.69 485.38
- finished_output_tokens 93.55 118.65
- sglang_input_tokens 414.51 525.75
- sglang_output_tokens 105.95 134.38
- 2025-07-20 17:31:06,796 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 9
- 2025-07-20 17:31:07,470 - sglang - INFO - [2025-07-20 17:31:07 TP0] Decode batch. #running-req: 4, #token: 11556, token usage: 0.30, gen throughput (token/s): 183.93, #queue-req: 0
- 2025-07-20 17:31:07,470 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:08,340 - sglang - INFO - [2025-07-20 17:31:08 TP0] Decode batch. #running-req: 4, #token: 11716, token usage: 0.31, gen throughput (token/s): 183.93, #queue-req: 0
- 2025-07-20 17:31:08,340 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:09,206 - sglang - INFO - [2025-07-20 17:31:09 TP0] Decode batch. #running-req: 4, #token: 11876, token usage: 0.31, gen throughput (token/s): 184.63, #queue-req: 0
- 2025-07-20 17:31:09,207 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:10,075 - sglang - INFO - [2025-07-20 17:31:10 TP0] Decode batch. #running-req: 4, #token: 12036, token usage: 0.32, gen throughput (token/s): 184.35, #queue-req: 0
- 2025-07-20 17:31:10,075 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:10,933 - sglang - INFO - [2025-07-20 17:31:10 TP0] Decode batch. #running-req: 3, #token: 8891, token usage: 0.23, gen throughput (token/s): 142.15, #queue-req: 0
- 2025-07-20 17:31:10,933 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:31:11,786 - sglang - INFO - [2025-07-20 17:31:11 TP0] Decode batch. #running-req: 2, #token: 5990, token usage: 0.16, gen throughput (token/s): 116.02, #queue-req: 0
- 2025-07-20 17:31:11,786 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:12,630 - sglang - INFO - [2025-07-20 17:31:12 TP0] Decode batch. #running-req: 2, #token: 6070, token usage: 0.16, gen throughput (token/s): 94.73, #queue-req: 0
- 2025-07-20 17:31:12,631 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:13,471 - sglang - INFO - [2025-07-20 17:31:13 TP0] Decode batch. #running-req: 2, #token: 6150, token usage: 0.16, gen throughput (token/s): 95.19, #queue-req: 0
- 2025-07-20 17:31:13,471 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:14,306 - sglang - INFO - [2025-07-20 17:31:14 TP0] Decode batch. #running-req: 1, #token: 2929, token usage: 0.08, gen throughput (token/s): 73.04, #queue-req: 0
- 2025-07-20 17:31:14,307 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:15,136 - sglang - INFO - [2025-07-20 17:31:15 TP0] Decode batch. #running-req: 1, #token: 2969, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:31:15,136 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:15,965 - sglang - INFO - [2025-07-20 17:31:15 TP0] Decode batch. #running-req: 1, #token: 3009, token usage: 0.08, gen throughput (token/s): 48.23, #queue-req: 0
- 2025-07-20 17:31:15,966 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:16,794 - sglang - INFO - [2025-07-20 17:31:16 TP0] Decode batch. #running-req: 1, #token: 3049, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
- 2025-07-20 17:31:16,795 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:16,797 - __main__ - INFO - Queue remaining: 21
- 2025-07-20 17:31:16,797 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 372.89 485.38
- finished_output_tokens 91.15 118.65
- sglang_input_tokens 423.30 551.00
- sglang_output_tokens 108.29 140.96
- 2025-07-20 17:31:16,798 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:31:17,624 - sglang - INFO - [2025-07-20 17:31:17 TP0] Decode batch. #running-req: 1, #token: 3089, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
- 2025-07-20 17:31:17,625 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:18,455 - sglang - INFO - [2025-07-20 17:31:18 TP0] Decode batch. #running-req: 1, #token: 3129, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:31:18,455 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:19,284 - sglang - INFO - [2025-07-20 17:31:19 TP0] Decode batch. #running-req: 1, #token: 3169, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:31:19,285 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:20,098 - __main__ - INFO - Finished TaskGroup for worker on dbac13d5d8d14af821606b2b6fcec79288c911ad
- 2025-07-20 17:31:20,098 - __main__ - INFO - Got 1 docs for dbac13d5d8d14af821606b2b6fcec79288c911ad
- 2025-07-20 17:31:20,099 - __main__ - INFO - Worker 0 processing work item a516ff5c967066055babccbea12ff6a88bdfe9b5
- 2025-07-20 17:31:20,099 - __main__ - INFO - Created all tasks for a516ff5c967066055babccbea12ff6a88bdfe9b5
- 2025-07-20 17:31:20,102 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
- 2025-07-20 17:31:20,226 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
- 2025-07-20 17:31:20,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
- 2025-07-20 17:31:20,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
- 2025-07-20 17:31:20,268 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
- 2025-07-20 17:31:20,306 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
- 2025-07-20 17:31:20,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
- 2025-07-20 17:31:20,377 - sglang - INFO - [2025-07-20 17:31:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2595, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:31:20,377 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:31:21,176 - sglang - INFO - [2025-07-20 17:31:21 TP0] Prefill batch. #new-seq: 5, #new-token: 10243, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 0
- 2025-07-20 17:31:21,176 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:24,384 - sglang - INFO - [2025-07-20 17:31:24 TP0] Decode batch. #running-req: 6, #token: 12844, token usage: 0.34, gen throughput (token/s): 8.82, #queue-req: 0
- 2025-07-20 17:31:24,384 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:25,254 - sglang - INFO - [2025-07-20 17:31:25 TP0] Decode batch. #running-req: 6, #token: 13084, token usage: 0.34, gen throughput (token/s): 275.75, #queue-req: 0
- 2025-07-20 17:31:25,254 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:26,127 - sglang - INFO - [2025-07-20 17:31:26 TP0] Decode batch. #running-req: 6, #token: 13324, token usage: 0.35, gen throughput (token/s): 274.84, #queue-req: 0
- 2025-07-20 17:31:26,128 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:26,800 - __main__ - INFO - Queue remaining: 20
- 2025-07-20 17:31:26,800 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 413.08 551.48
- finished_output_tokens 100.32 133.93
- sglang_input_tokens 418.51 558.73
- sglang_output_tokens 107.81 143.93
- 2025-07-20 17:31:26,800 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:31:27,003 - sglang - INFO - [2025-07-20 17:31:27 TP0] Decode batch. #running-req: 6, #token: 13564, token usage: 0.36, gen throughput (token/s): 274.19, #queue-req: 0
- 2025-07-20 17:31:27,003 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:27,877 - sglang - INFO - [2025-07-20 17:31:27 TP0] Decode batch. #running-req: 6, #token: 13804, token usage: 0.36, gen throughput (token/s): 274.54, #queue-req: 0
- 2025-07-20 17:31:27,877 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:28,752 - sglang - INFO - [2025-07-20 17:31:28 TP0] Decode batch. #running-req: 6, #token: 14044, token usage: 0.37, gen throughput (token/s): 274.35, #queue-req: 0
- 2025-07-20 17:31:28,752 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:29,626 - sglang - INFO - [2025-07-20 17:31:29 TP0] Decode batch. #running-req: 6, #token: 14284, token usage: 0.38, gen throughput (token/s): 274.36, #queue-req: 0
- 2025-07-20 17:31:29,627 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:30,502 - sglang - INFO - [2025-07-20 17:31:30 TP0] Decode batch. #running-req: 5, #token: 12651, token usage: 0.33, gen throughput (token/s): 262.59, #queue-req: 0
- 2025-07-20 17:31:30,502 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:31,374 - sglang - INFO - [2025-07-20 17:31:31 TP0] Decode batch. #running-req: 5, #token: 12851, token usage: 0.34, gen throughput (token/s): 229.39, #queue-req: 0
- 2025-07-20 17:31:31,374 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:32,246 - sglang - INFO - [2025-07-20 17:31:32 TP0] Decode batch. #running-req: 5, #token: 13051, token usage: 0.34, gen throughput (token/s): 229.50, #queue-req: 0
- 2025-07-20 17:31:32,246 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:33,117 - sglang - INFO - [2025-07-20 17:31:33 TP0] Decode batch. #running-req: 5, #token: 13251, token usage: 0.35, gen throughput (token/s): 229.53, #queue-req: 0
- 2025-07-20 17:31:33,117 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:33,989 - sglang - INFO - [2025-07-20 17:31:33 TP0] Decode batch. #running-req: 5, #token: 13451, token usage: 0.35, gen throughput (token/s): 229.43, #queue-req: 0
- 2025-07-20 17:31:33,989 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:34,860 - sglang - INFO - [2025-07-20 17:31:34 TP0] Decode batch. #running-req: 4, #token: 11363, token usage: 0.30, gen throughput (token/s): 216.96, #queue-req: 0
- 2025-07-20 17:31:34,860 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:35,720 - sglang - INFO - [2025-07-20 17:31:35 TP0] Decode batch. #running-req: 4, #token: 11523, token usage: 0.30, gen throughput (token/s): 185.90, #queue-req: 0
- 2025-07-20 17:31:35,721 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:36,581 - sglang - INFO - [2025-07-20 17:31:36 TP0] Decode batch. #running-req: 4, #token: 11683, token usage: 0.31, gen throughput (token/s): 186.01, #queue-req: 0
- 2025-07-20 17:31:36,581 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:36,801 - __main__ - INFO - Queue remaining: 20
- 2025-07-20 17:31:36,802 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 403.02 551.48
- finished_output_tokens 97.88 133.93
- sglang_input_tokens 416.59 514.40
- sglang_output_tokens 107.00 135.11
- 2025-07-20 17:31:36,802 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 6
- 2025-07-20 17:31:37,441 - sglang - INFO - [2025-07-20 17:31:37 TP0] Decode batch. #running-req: 4, #token: 11843, token usage: 0.31, gen throughput (token/s): 185.96, #queue-req: 0
- 2025-07-20 17:31:37,441 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:38,298 - sglang - INFO - [2025-07-20 17:31:38 TP0] Decode batch. #running-req: 3, #token: 9299, token usage: 0.24, gen throughput (token/s): 152.91, #queue-req: 0
- 2025-07-20 17:31:38,298 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:31:39,147 - sglang - INFO - [2025-07-20 17:31:39 TP0] Decode batch. #running-req: 2, #token: 6143, token usage: 0.16, gen throughput (token/s): 125.94, #queue-req: 0
- 2025-07-20 17:31:39,148 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:39,987 - sglang - INFO - [2025-07-20 17:31:39 TP0] Decode batch. #running-req: 2, #token: 6223, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
- 2025-07-20 17:31:39,987 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:40,826 - sglang - INFO - [2025-07-20 17:31:40 TP0] Decode batch. #running-req: 2, #token: 6303, token usage: 0.17, gen throughput (token/s): 95.34, #queue-req: 0
- 2025-07-20 17:31:40,826 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:41,661 - sglang - INFO - [2025-07-20 17:31:41 TP0] Decode batch. #running-req: 1, #token: 3225, token usage: 0.08, gen throughput (token/s): 70.67, #queue-req: 0
- 2025-07-20 17:31:41,661 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:42,491 - sglang - INFO - [2025-07-20 17:31:42 TP0] Decode batch. #running-req: 1, #token: 3265, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:31:42,491 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:43,320 - sglang - INFO - [2025-07-20 17:31:43 TP0] Decode batch. #running-req: 1, #token: 3305, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:31:43,321 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:44,150 - sglang - INFO - [2025-07-20 17:31:44 TP0] Decode batch. #running-req: 1, #token: 3345, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:31:44,151 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:44,981 - sglang - INFO - [2025-07-20 17:31:44 TP0] Decode batch. #running-req: 1, #token: 3385, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:31:44,982 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:45,812 - sglang - INFO - [2025-07-20 17:31:45 TP0] Decode batch. #running-req: 1, #token: 3425, token usage: 0.09, gen throughput (token/s): 48.18, #queue-req: 0
- 2025-07-20 17:31:45,812 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:46,628 - __main__ - INFO - Finished TaskGroup for worker on a516ff5c967066055babccbea12ff6a88bdfe9b5
- 2025-07-20 17:31:46,628 - __main__ - INFO - Got 1 docs for a516ff5c967066055babccbea12ff6a88bdfe9b5
- 2025-07-20 17:31:46,629 - __main__ - INFO - Worker 0 processing work item a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
- 2025-07-20 17:31:46,630 - __main__ - INFO - Created all tasks for a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
- 2025-07-20 17:31:46,635 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
- 2025-07-20 17:31:46,733 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
- 2025-07-20 17:31:46,761 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
- 2025-07-20 17:31:46,772 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
- 2025-07-20 17:31:46,776 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
- 2025-07-20 17:31:46,802 - __main__ - INFO - Queue remaining: 19
- 2025-07-20 17:31:46,803 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 423.96 594.28
- finished_output_tokens 104.70 146.76
- sglang_input_tokens 429.13 529.13
- sglang_output_tokens 111.83 139.47
- 2025-07-20 17:31:46,803 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:31:46,813 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
- 2025-07-20 17:31:46,896 - sglang - INFO - [2025-07-20 17:31:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:31:46,896 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:31:46,903 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
- 2025-07-20 17:31:47,428 - sglang - INFO - [2025-07-20 17:31:47 TP0] Prefill batch. #new-seq: 5, #new-token: 10578, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:31:47,428 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:31:50,714 - sglang - INFO - [2025-07-20 17:31:50 TP0] Decode batch. #running-req: 6, #token: 11937, token usage: 0.31, gen throughput (token/s): 9.18, #queue-req: 0
- 2025-07-20 17:31:50,714 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:51,583 - sglang - INFO - [2025-07-20 17:31:51 TP0] Decode batch. #running-req: 6, #token: 12177, token usage: 0.32, gen throughput (token/s): 276.28, #queue-req: 0
- 2025-07-20 17:31:51,583 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:52,455 - sglang - INFO - [2025-07-20 17:31:52 TP0] Decode batch. #running-req: 6, #token: 12417, token usage: 0.33, gen throughput (token/s): 275.21, #queue-req: 0
- 2025-07-20 17:31:52,455 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:53,326 - sglang - INFO - [2025-07-20 17:31:53 TP0] Decode batch. #running-req: 6, #token: 12657, token usage: 0.33, gen throughput (token/s): 275.27, #queue-req: 0
- 2025-07-20 17:31:53,327 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:54,199 - sglang - INFO - [2025-07-20 17:31:54 TP0] Decode batch. #running-req: 6, #token: 12897, token usage: 0.34, gen throughput (token/s): 275.21, #queue-req: 0
- 2025-07-20 17:31:54,199 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:55,071 - sglang - INFO - [2025-07-20 17:31:55 TP0] Decode batch. #running-req: 6, #token: 13137, token usage: 0.35, gen throughput (token/s): 275.10, #queue-req: 0
- 2025-07-20 17:31:55,071 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:31:55,942 - sglang - INFO - [2025-07-20 17:31:55 TP0] Decode batch. #running-req: 5, #token: 11783, token usage: 0.31, gen throughput (token/s): 259.32, #queue-req: 0
- 2025-07-20 17:31:55,943 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:31:56,804 - __main__ - INFO - Queue remaining: 19
- 2025-07-20 17:31:56,805 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 414.11 594.28
- finished_output_tokens 102.27 146.76
- sglang_input_tokens 426.37 539.47
- sglang_output_tokens 110.40 141.14
- 2025-07-20 17:31:56,805 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 6
- 2025-07-20 17:31:56,809 - sglang - INFO - [2025-07-20 17:31:56 TP0] Decode batch. #running-req: 4, #token: 9953, token usage: 0.26, gen throughput (token/s): 220.46, #queue-req: 0
- 2025-07-20 17:31:56,809 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:31:57,665 - sglang - INFO - [2025-07-20 17:31:57 TP0] Decode batch. #running-req: 3, #token: 7607, token usage: 0.20, gen throughput (token/s): 177.50, #queue-req: 0
- 2025-07-20 17:31:57,665 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:31:58,516 - sglang - INFO - [2025-07-20 17:31:58 TP0] Decode batch. #running-req: 2, #token: 5645, token usage: 0.15, gen throughput (token/s): 138.62, #queue-req: 0
- 2025-07-20 17:31:58,517 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:31:59,355 - sglang - INFO - [2025-07-20 17:31:59 TP0] Decode batch. #running-req: 2, #token: 5725, token usage: 0.15, gen throughput (token/s): 95.40, #queue-req: 0
- 2025-07-20 17:31:59,355 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:00,193 - sglang - INFO - [2025-07-20 17:32:00 TP0] Decode batch. #running-req: 2, #token: 5805, token usage: 0.15, gen throughput (token/s): 95.46, #queue-req: 0
- 2025-07-20 17:32:00,193 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:01,033 - sglang - INFO - [2025-07-20 17:32:01 TP0] Decode batch. #running-req: 2, #token: 5885, token usage: 0.15, gen throughput (token/s): 95.29, #queue-req: 0
- 2025-07-20 17:32:01,033 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:01,873 - sglang - INFO - [2025-07-20 17:32:01 TP0] Decode batch. #running-req: 2, #token: 5965, token usage: 0.16, gen throughput (token/s): 95.23, #queue-req: 0
- 2025-07-20 17:32:01,873 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:02,712 - sglang - INFO - [2025-07-20 17:32:02 TP0] Decode batch. #running-req: 2, #token: 6045, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
- 2025-07-20 17:32:02,712 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:03,552 - sglang - INFO - [2025-07-20 17:32:03 TP0] Decode batch. #running-req: 2, #token: 6125, token usage: 0.16, gen throughput (token/s): 95.19, #queue-req: 0
- 2025-07-20 17:32:03,553 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:04,389 - sglang - INFO - [2025-07-20 17:32:04 TP0] Decode batch. #running-req: 1, #token: 3058, token usage: 0.08, gen throughput (token/s): 75.30, #queue-req: 0
- 2025-07-20 17:32:04,389 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:05,219 - sglang - INFO - [2025-07-20 17:32:05 TP0] Decode batch. #running-req: 1, #token: 3098, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:32:05,219 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:06,048 - sglang - INFO - [2025-07-20 17:32:06 TP0] Decode batch. #running-req: 1, #token: 3138, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
- 2025-07-20 17:32:06,048 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:06,806 - __main__ - INFO - Queue remaining: 19
- 2025-07-20 17:32:06,806 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 404.71 594.28
- finished_output_tokens 99.94 146.76
- sglang_input_tokens 431.24 560.84
- sglang_output_tokens 110.84 145.47
- 2025-07-20 17:32:06,806 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 6
- 2025-07-20 17:32:06,878 - sglang - INFO - [2025-07-20 17:32:06 TP0] Decode batch. #running-req: 1, #token: 3178, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:32:06,878 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:07,257 - __main__ - INFO - Finished TaskGroup for worker on a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
- 2025-07-20 17:32:07,257 - __main__ - INFO - Got 1 docs for a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
- 2025-07-20 17:32:07,259 - __main__ - INFO - Worker 0 processing work item e4811c9442eb8e0a3b6177e544c95e0299d41166
- 2025-07-20 17:32:07,259 - __main__ - INFO - Created all tasks for e4811c9442eb8e0a3b6177e544c95e0299d41166
- 2025-07-20 17:32:07,265 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
- 2025-07-20 17:32:07,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
- 2025-07-20 17:32:07,388 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
- 2025-07-20 17:32:07,407 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
- 2025-07-20 17:32:07,411 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
- 2025-07-20 17:32:07,446 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
- 2025-07-20 17:32:07,506 - sglang - INFO - [2025-07-20 17:32:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:32:07,506 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:32:07,526 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
- 2025-07-20 17:32:08,038 - sglang - INFO - [2025-07-20 17:32:08 TP0] Prefill batch. #new-seq: 5, #new-token: 10553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:32:08,039 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:11,779 - sglang - INFO - [2025-07-20 17:32:11 TP0] Decode batch. #running-req: 6, #token: 12038, token usage: 0.32, gen throughput (token/s): 30.61, #queue-req: 0
- 2025-07-20 17:32:11,779 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:12,650 - sglang - INFO - [2025-07-20 17:32:12 TP0] Decode batch. #running-req: 6, #token: 12278, token usage: 0.32, gen throughput (token/s): 275.43, #queue-req: 0
- 2025-07-20 17:32:12,650 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:13,521 - sglang - INFO - [2025-07-20 17:32:13 TP0] Decode batch. #running-req: 6, #token: 12518, token usage: 0.33, gen throughput (token/s): 275.44, #queue-req: 0
- 2025-07-20 17:32:13,522 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:14,392 - sglang - INFO - [2025-07-20 17:32:14 TP0] Decode batch. #running-req: 6, #token: 12758, token usage: 0.34, gen throughput (token/s): 275.70, #queue-req: 0
- 2025-07-20 17:32:14,392 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:15,263 - sglang - INFO - [2025-07-20 17:32:15 TP0] Decode batch. #running-req: 6, #token: 12998, token usage: 0.34, gen throughput (token/s): 275.36, #queue-req: 0
- 2025-07-20 17:32:15,264 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:16,137 - sglang - INFO - [2025-07-20 17:32:16 TP0] Decode batch. #running-req: 6, #token: 13238, token usage: 0.35, gen throughput (token/s): 274.71, #queue-req: 0
- 2025-07-20 17:32:16,137 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:16,809 - __main__ - INFO - Queue remaining: 18
- 2025-07-20 17:32:16,809 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 422.21 634.05
- finished_output_tokens 103.45 155.36
- sglang_input_tokens 430.04 573.41
- sglang_output_tokens 110.62 148.84
- 2025-07-20 17:32:16,809 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 6
- 2025-07-20 17:32:17,008 - sglang - INFO - [2025-07-20 17:32:17 TP0] Decode batch. #running-req: 5, #token: 11863, token usage: 0.31, gen throughput (token/s): 238.82, #queue-req: 0
- 2025-07-20 17:32:17,008 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:17,866 - sglang - INFO - [2025-07-20 17:32:17 TP0] Decode batch. #running-req: 3, #token: 7537, token usage: 0.20, gen throughput (token/s): 192.24, #queue-req: 0
- 2025-07-20 17:32:17,866 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:32:18,717 - sglang - INFO - [2025-07-20 17:32:18 TP0] Decode batch. #running-req: 3, #token: 7657, token usage: 0.20, gen throughput (token/s): 141.11, #queue-req: 0
- 2025-07-20 17:32:18,717 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:32:19,563 - sglang - INFO - [2025-07-20 17:32:19 TP0] Decode batch. #running-req: 2, #token: 5691, token usage: 0.15, gen throughput (token/s): 118.20, #queue-req: 0
- 2025-07-20 17:32:19,563 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:20,401 - sglang - INFO - [2025-07-20 17:32:20 TP0] Decode batch. #running-req: 2, #token: 5771, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
- 2025-07-20 17:32:20,402 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:21,240 - sglang - INFO - [2025-07-20 17:32:21 TP0] Decode batch. #running-req: 2, #token: 5851, token usage: 0.15, gen throughput (token/s): 95.43, #queue-req: 0
- 2025-07-20 17:32:21,240 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:22,078 - sglang - INFO - [2025-07-20 17:32:22 TP0] Decode batch. #running-req: 2, #token: 5931, token usage: 0.16, gen throughput (token/s): 95.38, #queue-req: 0
- 2025-07-20 17:32:22,079 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:22,918 - sglang - INFO - [2025-07-20 17:32:22 TP0] Decode batch. #running-req: 2, #token: 6011, token usage: 0.16, gen throughput (token/s): 95.24, #queue-req: 0
- 2025-07-20 17:32:22,919 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:23,758 - sglang - INFO - [2025-07-20 17:32:23 TP0] Decode batch. #running-req: 2, #token: 6091, token usage: 0.16, gen throughput (token/s): 95.33, #queue-req: 0
- 2025-07-20 17:32:23,758 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:24,597 - sglang - INFO - [2025-07-20 17:32:24 TP0] Decode batch. #running-req: 2, #token: 6171, token usage: 0.16, gen throughput (token/s): 95.35, #queue-req: 0
- 2025-07-20 17:32:24,597 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:25,427 - sglang - INFO - [2025-07-20 17:32:25 TP0] Decode batch. #running-req: 1, #token: 3081, token usage: 0.08, gen throughput (token/s): 51.77, #queue-req: 0
- 2025-07-20 17:32:25,427 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:26,257 - sglang - INFO - [2025-07-20 17:32:26 TP0] Decode batch. #running-req: 1, #token: 3121, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
- 2025-07-20 17:32:26,257 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:26,810 - __main__ - INFO - Queue remaining: 18
- 2025-07-20 17:32:26,811 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 413.04 634.05
- finished_output_tokens 101.20 155.36
- sglang_input_tokens 438.36 593.28
- sglang_output_tokens 111.61 144.04
- 2025-07-20 17:32:26,811 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 6
- 2025-07-20 17:32:27,086 - sglang - INFO - [2025-07-20 17:32:27 TP0] Decode batch. #running-req: 1, #token: 3161, token usage: 0.08, gen throughput (token/s): 48.25, #queue-req: 0
- 2025-07-20 17:32:27,086 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:27,859 - __main__ - INFO - Finished TaskGroup for worker on e4811c9442eb8e0a3b6177e544c95e0299d41166
- 2025-07-20 17:32:27,859 - __main__ - INFO - Got 1 docs for e4811c9442eb8e0a3b6177e544c95e0299d41166
- 2025-07-20 17:32:27,860 - __main__ - INFO - Worker 0 processing work item 95eb6113ad117cc5bc5c734f7ca31625e117229d
- 2025-07-20 17:32:27,861 - __main__ - INFO - Created all tasks for 95eb6113ad117cc5bc5c734f7ca31625e117229d
- 2025-07-20 17:32:27,867 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
- 2025-07-20 17:32:27,973 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
- 2025-07-20 17:32:28,026 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
- 2025-07-20 17:32:28,042 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
- 2025-07-20 17:32:28,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
- 2025-07-20 17:32:28,109 - sglang - INFO - [2025-07-20 17:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1457, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:32:28,109 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:32:28,110 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
- 2025-07-20 17:32:28,118 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
- 2025-07-20 17:32:28,684 - sglang - INFO - [2025-07-20 17:32:28 TP0] Prefill batch. #new-seq: 5, #new-token: 11322, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:32:28,684 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:32,162 - sglang - INFO - [2025-07-20 17:32:32 TP0] Decode batch. #running-req: 6, #token: 12797, token usage: 0.34, gen throughput (token/s): 10.84, #queue-req: 0
- 2025-07-20 17:32:32,162 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:33,033 - sglang - INFO - [2025-07-20 17:32:33 TP0] Decode batch. #running-req: 6, #token: 13037, token usage: 0.34, gen throughput (token/s): 275.55, #queue-req: 0
- 2025-07-20 17:32:33,033 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:33,906 - sglang - INFO - [2025-07-20 17:32:33 TP0] Decode batch. #running-req: 6, #token: 13277, token usage: 0.35, gen throughput (token/s): 274.78, #queue-req: 0
- 2025-07-20 17:32:33,906 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:34,780 - sglang - INFO - [2025-07-20 17:32:34 TP0] Decode batch. #running-req: 6, #token: 13517, token usage: 0.36, gen throughput (token/s): 274.42, #queue-req: 0
- 2025-07-20 17:32:34,781 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:35,656 - sglang - INFO - [2025-07-20 17:32:35 TP0] Decode batch. #running-req: 6, #token: 13757, token usage: 0.36, gen throughput (token/s): 274.22, #queue-req: 0
- 2025-07-20 17:32:35,656 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:36,532 - sglang - INFO - [2025-07-20 17:32:36 TP0] Decode batch. #running-req: 6, #token: 13997, token usage: 0.37, gen throughput (token/s): 274.01, #queue-req: 0
- 2025-07-20 17:32:36,532 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:36,812 - __main__ - INFO - Queue remaining: 17
- 2025-07-20 17:32:36,812 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 429.56 673.73
- finished_output_tokens 104.52 163.93
- sglang_input_tokens 434.18 601.34
- sglang_output_tokens 110.89 146.64
- 2025-07-20 17:32:36,812 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:32:37,408 - sglang - INFO - [2025-07-20 17:32:37 TP0] Decode batch. #running-req: 6, #token: 14237, token usage: 0.37, gen throughput (token/s): 273.87, #queue-req: 0
- 2025-07-20 17:32:37,408 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:38,284 - sglang - INFO - [2025-07-20 17:32:38 TP0] Decode batch. #running-req: 5, #token: 12737, token usage: 0.34, gen throughput (token/s): 255.77, #queue-req: 0
- 2025-07-20 17:32:38,284 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:39,156 - sglang - INFO - [2025-07-20 17:32:39 TP0] Decode batch. #running-req: 5, #token: 12937, token usage: 0.34, gen throughput (token/s): 229.36, #queue-req: 0
- 2025-07-20 17:32:39,156 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:40,027 - sglang - INFO - [2025-07-20 17:32:40 TP0] Decode batch. #running-req: 5, #token: 13137, token usage: 0.35, gen throughput (token/s): 229.43, #queue-req: 0
- 2025-07-20 17:32:40,028 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:40,899 - sglang - INFO - [2025-07-20 17:32:40 TP0] Decode batch. #running-req: 5, #token: 13337, token usage: 0.35, gen throughput (token/s): 229.41, #queue-req: 0
- 2025-07-20 17:32:40,899 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:41,772 - sglang - INFO - [2025-07-20 17:32:41 TP0] Decode batch. #running-req: 5, #token: 13537, token usage: 0.36, gen throughput (token/s): 229.15, #queue-req: 0
- 2025-07-20 17:32:41,772 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:42,645 - sglang - INFO - [2025-07-20 17:32:42 TP0] Decode batch. #running-req: 5, #token: 13737, token usage: 0.36, gen throughput (token/s): 228.96, #queue-req: 0
- 2025-07-20 17:32:42,646 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:43,521 - sglang - INFO - [2025-07-20 17:32:43 TP0] Decode batch. #running-req: 5, #token: 13937, token usage: 0.37, gen throughput (token/s): 228.48, #queue-req: 0
- 2025-07-20 17:32:43,521 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:32:44,393 - sglang - INFO - [2025-07-20 17:32:44 TP0] Decode batch. #running-req: 4, #token: 11695, token usage: 0.31, gen throughput (token/s): 218.86, #queue-req: 0
- 2025-07-20 17:32:44,394 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:32:45,252 - sglang - INFO - [2025-07-20 17:32:45 TP0] Decode batch. #running-req: 3, #token: 9152, token usage: 0.24, gen throughput (token/s): 173.49, #queue-req: 0
- 2025-07-20 17:32:45,253 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:32:46,106 - sglang - INFO - [2025-07-20 17:32:46 TP0] Decode batch. #running-req: 3, #token: 9272, token usage: 0.24, gen throughput (token/s): 140.60, #queue-req: 0
- 2025-07-20 17:32:46,106 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:32:46,814 - __main__ - INFO - Queue remaining: 17
- 2025-07-20 17:32:46,815 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 420.62 594.10
- finished_output_tokens 102.34 144.93
- sglang_input_tokens 441.81 614.34
- sglang_output_tokens 112.88 150.94
- 2025-07-20 17:32:46,815 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 6
- 2025-07-20 17:32:46,948 - sglang - INFO - [2025-07-20 17:32:46 TP0] Decode batch. #running-req: 2, #token: 6137, token usage: 0.16, gen throughput (token/s): 100.89, #queue-req: 0
- 2025-07-20 17:32:46,949 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:47,788 - sglang - INFO - [2025-07-20 17:32:47 TP0] Decode batch. #running-req: 2, #token: 6217, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:32:47,788 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:48,627 - sglang - INFO - [2025-07-20 17:32:48 TP0] Decode batch. #running-req: 2, #token: 6297, token usage: 0.17, gen throughput (token/s): 95.31, #queue-req: 0
- 2025-07-20 17:32:48,628 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:49,467 - sglang - INFO - [2025-07-20 17:32:49 TP0] Decode batch. #running-req: 2, #token: 6377, token usage: 0.17, gen throughput (token/s): 95.27, #queue-req: 0
- 2025-07-20 17:32:49,467 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:50,308 - sglang - INFO - [2025-07-20 17:32:50 TP0] Decode batch. #running-req: 2, #token: 6457, token usage: 0.17, gen throughput (token/s): 95.11, #queue-req: 0
- 2025-07-20 17:32:50,309 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:32:51,143 - sglang - INFO - [2025-07-20 17:32:51 TP0] Decode batch. #running-req: 1, #token: 3307, token usage: 0.09, gen throughput (token/s): 63.52, #queue-req: 0
- 2025-07-20 17:32:51,143 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:51,973 - sglang - INFO - [2025-07-20 17:32:51 TP0] Decode batch. #running-req: 1, #token: 3347, token usage: 0.09, gen throughput (token/s): 48.18, #queue-req: 0
- 2025-07-20 17:32:51,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:52,804 - sglang - INFO - [2025-07-20 17:32:52 TP0] Decode batch. #running-req: 1, #token: 3387, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:32:52,804 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:53,620 - __main__ - INFO - Finished TaskGroup for worker on 95eb6113ad117cc5bc5c734f7ca31625e117229d
- 2025-07-20 17:32:53,620 - __main__ - INFO - Got 1 docs for 95eb6113ad117cc5bc5c734f7ca31625e117229d
- 2025-07-20 17:32:53,622 - __main__ - INFO - Worker 0 processing work item f5bd195da84dc4c9a132080ffb1a40239bb6d12b
- 2025-07-20 17:32:53,622 - __main__ - INFO - Created all tasks for f5bd195da84dc4c9a132080ffb1a40239bb6d12b
- 2025-07-20 17:32:53,628 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
- 2025-07-20 17:32:53,724 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
- 2025-07-20 17:32:53,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
- 2025-07-20 17:32:53,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
- 2025-07-20 17:32:53,769 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
- 2025-07-20 17:32:53,805 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
- 2025-07-20 17:32:53,884 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
- 2025-07-20 17:32:53,890 - sglang - INFO - [2025-07-20 17:32:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1749, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:32:53,890 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:32:54,517 - sglang - INFO - [2025-07-20 17:32:54 TP0] Prefill batch. #new-seq: 5, #new-token: 10159, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-07-20 17:32:54,517 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:32:56,816 - __main__ - INFO - Queue remaining: 16
- 2025-07-20 17:32:56,816 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 438.10 636.69
- finished_output_tokens 108.26 158.01
- sglang_input_tokens 442.53 600.97
- sglang_output_tokens 114.37 151.97
- 2025-07-20 17:32:56,816 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:32:57,709 - sglang - INFO - [2025-07-20 17:32:57 TP0] Decode batch. #running-req: 6, #token: 11914, token usage: 0.31, gen throughput (token/s): 9.17, #queue-req: 0
- 2025-07-20 17:32:57,709 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:58,578 - sglang - INFO - [2025-07-20 17:32:58 TP0] Decode batch. #running-req: 6, #token: 12154, token usage: 0.32, gen throughput (token/s): 276.27, #queue-req: 0
- 2025-07-20 17:32:58,578 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:32:59,452 - sglang - INFO - [2025-07-20 17:32:59 TP0] Decode batch. #running-req: 6, #token: 12394, token usage: 0.33, gen throughput (token/s): 274.68, #queue-req: 0
- 2025-07-20 17:32:59,452 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:00,324 - sglang - INFO - [2025-07-20 17:33:00 TP0] Decode batch. #running-req: 6, #token: 12634, token usage: 0.33, gen throughput (token/s): 275.24, #queue-req: 0
- 2025-07-20 17:33:00,324 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:01,196 - sglang - INFO - [2025-07-20 17:33:01 TP0] Decode batch. #running-req: 6, #token: 12874, token usage: 0.34, gen throughput (token/s): 275.08, #queue-req: 0
- 2025-07-20 17:33:01,196 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:02,069 - sglang - INFO - [2025-07-20 17:33:02 TP0] Decode batch. #running-req: 6, #token: 13114, token usage: 0.35, gen throughput (token/s): 275.10, #queue-req: 0
- 2025-07-20 17:33:02,069 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:02,940 - sglang - INFO - [2025-07-20 17:33:02 TP0] Decode batch. #running-req: 5, #token: 11760, token usage: 0.31, gen throughput (token/s): 262.67, #queue-req: 0
- 2025-07-20 17:33:02,941 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:03,810 - sglang - INFO - [2025-07-20 17:33:03 TP0] Decode batch. #running-req: 5, #token: 11960, token usage: 0.31, gen throughput (token/s): 230.04, #queue-req: 0
- 2025-07-20 17:33:03,810 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:04,666 - sglang - INFO - [2025-07-20 17:33:04 TP0] Decode batch. #running-req: 3, #token: 7608, token usage: 0.20, gen throughput (token/s): 163.60, #queue-req: 0
- 2025-07-20 17:33:04,666 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:33:05,517 - sglang - INFO - [2025-07-20 17:33:05 TP0] Decode batch. #running-req: 2, #token: 5646, token usage: 0.15, gen throughput (token/s): 139.67, #queue-req: 0
- 2025-07-20 17:33:05,518 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:06,356 - sglang - INFO - [2025-07-20 17:33:06 TP0] Decode batch. #running-req: 2, #token: 5726, token usage: 0.15, gen throughput (token/s): 95.40, #queue-req: 0
- 2025-07-20 17:33:06,356 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:06,817 - __main__ - INFO - Queue remaining: 16
- 2025-07-20 17:33:06,818 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 429.34 636.69
- finished_output_tokens 106.09 158.01
- sglang_input_tokens 447.64 599.13
- sglang_output_tokens 114.44 148.91
- 2025-07-20 17:33:06,818 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 6
- 2025-07-20 17:33:07,195 - sglang - INFO - [2025-07-20 17:33:07 TP0] Decode batch. #running-req: 2, #token: 5806, token usage: 0.15, gen throughput (token/s): 95.41, #queue-req: 0
- 2025-07-20 17:33:07,195 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:08,034 - sglang - INFO - [2025-07-20 17:33:08 TP0] Decode batch. #running-req: 2, #token: 5886, token usage: 0.15, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:33:08,035 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:08,874 - sglang - INFO - [2025-07-20 17:33:08 TP0] Decode batch. #running-req: 2, #token: 5966, token usage: 0.16, gen throughput (token/s): 95.25, #queue-req: 0
- 2025-07-20 17:33:08,875 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:09,714 - sglang - INFO - [2025-07-20 17:33:09 TP0] Decode batch. #running-req: 2, #token: 6046, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:33:09,714 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:10,553 - sglang - INFO - [2025-07-20 17:33:10 TP0] Decode batch. #running-req: 2, #token: 6126, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
- 2025-07-20 17:33:10,553 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:11,390 - sglang - INFO - [2025-07-20 17:33:11 TP0] Decode batch. #running-req: 1, #token: 3058, token usage: 0.08, gen throughput (token/s): 78.85, #queue-req: 0
- 2025-07-20 17:33:11,391 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:12,221 - sglang - INFO - [2025-07-20 17:33:12 TP0] Decode batch. #running-req: 1, #token: 3098, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:33:12,221 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:13,051 - sglang - INFO - [2025-07-20 17:33:13 TP0] Decode batch. #running-req: 1, #token: 3138, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:33:13,051 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:13,881 - sglang - INFO - [2025-07-20 17:33:13 TP0] Decode batch. #running-req: 1, #token: 3178, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
- 2025-07-20 17:33:13,881 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:14,260 - __main__ - INFO - Finished TaskGroup for worker on f5bd195da84dc4c9a132080ffb1a40239bb6d12b
- 2025-07-20 17:33:14,260 - __main__ - INFO - Got 1 docs for f5bd195da84dc4c9a132080ffb1a40239bb6d12b
- 2025-07-20 17:33:14,262 - __main__ - INFO - Worker 0 processing work item 7815bd6305410d3cbbea8287ed60dae1462e6e65
- 2025-07-20 17:33:14,262 - __main__ - INFO - Created all tasks for 7815bd6305410d3cbbea8287ed60dae1462e6e65
- 2025-07-20 17:33:14,267 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
- 2025-07-20 17:33:14,383 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
- 2025-07-20 17:33:14,411 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
- 2025-07-20 17:33:14,417 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
- 2025-07-20 17:33:14,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
- 2025-07-20 17:33:14,530 - sglang - INFO - [2025-07-20 17:33:14 TP0] Prefill batch. #new-seq: 1, #new-token: 1537, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:33:14,531 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:33:14,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
- 2025-07-20 17:33:15,118 - sglang - INFO - [2025-07-20 17:33:15 TP0] Prefill batch. #new-seq: 4, #new-token: 8980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:33:15,118 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:16,819 - __main__ - INFO - Queue remaining: 15
- 2025-07-20 17:33:16,819 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 444.26 607.82
- finished_output_tokens 109.08 150.63
- sglang_input_tokens 448.51 607.82
- sglang_output_tokens 114.95 150.63
- 2025-07-20 17:33:16,819 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-07-20 17:33:18,331 - sglang - INFO - [2025-07-20 17:33:18 TP0] Decode batch. #running-req: 5, #token: 10627, token usage: 0.28, gen throughput (token/s): 28.76, #queue-req: 0
- 2025-07-20 17:33:18,331 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:19,198 - sglang - INFO - [2025-07-20 17:33:19 TP0] Decode batch. #running-req: 5, #token: 10827, token usage: 0.29, gen throughput (token/s): 230.59, #queue-req: 0
- 2025-07-20 17:33:19,199 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:20,065 - sglang - INFO - [2025-07-20 17:33:20 TP0] Decode batch. #running-req: 5, #token: 11027, token usage: 0.29, gen throughput (token/s): 230.83, #queue-req: 0
- 2025-07-20 17:33:20,065 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:20,931 - sglang - INFO - [2025-07-20 17:33:20 TP0] Decode batch. #running-req: 5, #token: 11227, token usage: 0.30, gen throughput (token/s): 230.80, #queue-req: 0
- 2025-07-20 17:33:20,931 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:21,799 - sglang - INFO - [2025-07-20 17:33:21 TP0] Decode batch. #running-req: 5, #token: 11427, token usage: 0.30, gen throughput (token/s): 230.41, #queue-req: 0
- 2025-07-20 17:33:21,800 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:22,670 - sglang - INFO - [2025-07-20 17:33:22 TP0] Decode batch. #running-req: 5, #token: 11627, token usage: 0.31, gen throughput (token/s): 229.64, #queue-req: 0
- 2025-07-20 17:33:22,670 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:23,541 - sglang - INFO - [2025-07-20 17:33:23 TP0] Decode batch. #running-req: 5, #token: 11827, token usage: 0.31, gen throughput (token/s): 229.61, #queue-req: 0
- 2025-07-20 17:33:23,542 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:24,402 - sglang - INFO - [2025-07-20 17:33:24 TP0] Decode batch. #running-req: 4, #token: 10188, token usage: 0.27, gen throughput (token/s): 197.42, #queue-req: 0
- 2025-07-20 17:33:24,403 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:33:25,260 - sglang - INFO - [2025-07-20 17:33:25 TP0] Decode batch. #running-req: 3, #token: 7677, token usage: 0.20, gen throughput (token/s): 175.99, #queue-req: 0
- 2025-07-20 17:33:25,261 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:33:26,110 - sglang - INFO - [2025-07-20 17:33:26 TP0] Decode batch. #running-req: 2, #token: 5641, token usage: 0.15, gen throughput (token/s): 129.51, #queue-req: 0
- 2025-07-20 17:33:26,110 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:26,820 - __main__ - INFO - Queue remaining: 15
- 2025-07-20 17:33:26,821 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 435.72 607.82
- finished_output_tokens 106.98 150.63
- sglang_input_tokens 450.73 602.50
- sglang_output_tokens 114.63 149.60
- 2025-07-20 17:33:26,821 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 5
- 2025-07-20 17:33:26,949 - sglang - INFO - [2025-07-20 17:33:26 TP0] Decode batch. #running-req: 2, #token: 5721, token usage: 0.15, gen throughput (token/s): 95.32, #queue-req: 0
- 2025-07-20 17:33:26,949 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:27,788 - sglang - INFO - [2025-07-20 17:33:27 TP0] Decode batch. #running-req: 2, #token: 5801, token usage: 0.15, gen throughput (token/s): 95.35, #queue-req: 0
- 2025-07-20 17:33:27,788 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:28,627 - sglang - INFO - [2025-07-20 17:33:28 TP0] Decode batch. #running-req: 2, #token: 5881, token usage: 0.15, gen throughput (token/s): 95.31, #queue-req: 0
- 2025-07-20 17:33:28,628 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:29,467 - sglang - INFO - [2025-07-20 17:33:29 TP0] Decode batch. #running-req: 2, #token: 5961, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:33:29,467 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:30,308 - sglang - INFO - [2025-07-20 17:33:30 TP0] Decode batch. #running-req: 2, #token: 6041, token usage: 0.16, gen throughput (token/s): 95.20, #queue-req: 0
- 2025-07-20 17:33:30,308 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:31,147 - sglang - INFO - [2025-07-20 17:33:31 TP0] Decode batch. #running-req: 2, #token: 6121, token usage: 0.16, gen throughput (token/s): 95.30, #queue-req: 0
- 2025-07-20 17:33:31,147 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:31,979 - sglang - INFO - [2025-07-20 17:33:31 TP0] Decode batch. #running-req: 1, #token: 3032, token usage: 0.08, gen throughput (token/s): 55.29, #queue-req: 0
- 2025-07-20 17:33:31,979 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:32,809 - sglang - INFO - [2025-07-20 17:33:32 TP0] Decode batch. #running-req: 1, #token: 3072, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:33:32,809 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:33,640 - sglang - INFO - [2025-07-20 17:33:33 TP0] Decode batch. #running-req: 1, #token: 3112, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:33:33,640 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:34,471 - sglang - INFO - [2025-07-20 17:33:34 TP0] Decode batch. #running-req: 1, #token: 3152, token usage: 0.08, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:33:34,471 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:34,870 - __main__ - INFO - Finished TaskGroup for worker on 7815bd6305410d3cbbea8287ed60dae1462e6e65
- 2025-07-20 17:33:34,871 - __main__ - INFO - Got 1 docs for 7815bd6305410d3cbbea8287ed60dae1462e6e65
- 2025-07-20 17:33:34,872 - __main__ - INFO - Worker 0 processing work item 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
- 2025-07-20 17:33:34,872 - __main__ - INFO - Created all tasks for 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
- 2025-07-20 17:33:34,878 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
- 2025-07-20 17:33:35,014 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
- 2025-07-20 17:33:35,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
- 2025-07-20 17:33:35,061 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
- 2025-07-20 17:33:35,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
- 2025-07-20 17:33:35,071 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
- 2025-07-20 17:33:35,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
- 2025-07-20 17:33:35,089 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
- 2025-07-20 17:33:35,185 - sglang - INFO - [2025-07-20 17:33:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2027, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:33:35,186 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:33:35,858 - sglang - INFO - [2025-07-20 17:33:35 TP0] Prefill batch. #new-seq: 6, #new-token: 13399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-07-20 17:33:35,859 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:33:36,822 - __main__ - INFO - Queue remaining: 14
- 2025-07-20 17:33:36,823 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 447.33 642.87
- finished_output_tokens 109.51 158.66
- sglang_input_tokens 451.43 579.61
- sglang_output_tokens 115.16 146.10
- 2025-07-20 17:33:36,823 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 17:33:40,386 - sglang - INFO - [2025-07-20 17:33:40 TP0] Decode batch. #running-req: 7, #token: 15573, token usage: 0.41, gen throughput (token/s): 28.06, #queue-req: 0
- 2025-07-20 17:33:40,386 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:41,267 - sglang - INFO - [2025-07-20 17:33:41 TP0] Decode batch. #running-req: 7, #token: 15853, token usage: 0.42, gen throughput (token/s): 317.60, #queue-req: 0
- 2025-07-20 17:33:41,268 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:42,150 - sglang - INFO - [2025-07-20 17:33:42 TP0] Decode batch. #running-req: 7, #token: 16133, token usage: 0.42, gen throughput (token/s): 317.42, #queue-req: 0
- 2025-07-20 17:33:42,150 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:43,031 - sglang - INFO - [2025-07-20 17:33:43 TP0] Decode batch. #running-req: 7, #token: 16413, token usage: 0.43, gen throughput (token/s): 317.57, #queue-req: 0
- 2025-07-20 17:33:43,032 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:43,915 - sglang - INFO - [2025-07-20 17:33:43 TP0] Decode batch. #running-req: 7, #token: 16693, token usage: 0.44, gen throughput (token/s): 316.84, #queue-req: 0
- 2025-07-20 17:33:43,915 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:44,801 - sglang - INFO - [2025-07-20 17:33:44 TP0] Decode batch. #running-req: 7, #token: 16973, token usage: 0.45, gen throughput (token/s): 315.96, #queue-req: 0
- 2025-07-20 17:33:44,801 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:45,687 - sglang - INFO - [2025-07-20 17:33:45 TP0] Decode batch. #running-req: 7, #token: 17253, token usage: 0.45, gen throughput (token/s): 316.16, #queue-req: 0
- 2025-07-20 17:33:45,687 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:46,572 - sglang - INFO - [2025-07-20 17:33:46 TP0] Decode batch. #running-req: 7, #token: 17533, token usage: 0.46, gen throughput (token/s): 316.17, #queue-req: 0
- 2025-07-20 17:33:46,573 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:46,823 - __main__ - INFO - Queue remaining: 14
- 2025-07-20 17:33:46,824 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 439.05 642.87
- finished_output_tokens 107.48 158.66
- sglang_input_tokens 443.07 579.61
- sglang_output_tokens 113.03 146.10
- 2025-07-20 17:33:46,824 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 17:33:47,458 - sglang - INFO - [2025-07-20 17:33:47 TP0] Decode batch. #running-req: 7, #token: 17813, token usage: 0.47, gen throughput (token/s): 316.31, #queue-req: 0
- 2025-07-20 17:33:47,458 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:33:48,342 - sglang - INFO - [2025-07-20 17:33:48 TP0] Decode batch. #running-req: 6, #token: 15685, token usage: 0.41, gen throughput (token/s): 301.99, #queue-req: 0
- 2025-07-20 17:33:48,342 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:49,223 - sglang - INFO - [2025-07-20 17:33:49 TP0] Decode batch. #running-req: 6, #token: 15925, token usage: 0.42, gen throughput (token/s): 272.47, #queue-req: 0
- 2025-07-20 17:33:49,223 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:33:50,103 - sglang - INFO - [2025-07-20 17:33:50 TP0] Decode batch. #running-req: 5, #token: 13191, token usage: 0.35, gen throughput (token/s): 262.42, #queue-req: 0
- 2025-07-20 17:33:50,103 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:50,976 - sglang - INFO - [2025-07-20 17:33:50 TP0] Decode batch. #running-req: 5, #token: 13391, token usage: 0.35, gen throughput (token/s): 229.06, #queue-req: 0
- 2025-07-20 17:33:50,976 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:33:51,841 - sglang - INFO - [2025-07-20 17:33:51 TP0] Decode batch. #running-req: 3, #token: 8173, token usage: 0.22, gen throughput (token/s): 198.80, #queue-req: 0
- 2025-07-20 17:33:51,841 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:33:52,684 - sglang - INFO - [2025-07-20 17:33:52 TP0] Decode batch. #running-req: 2, #token: 5725, token usage: 0.15, gen throughput (token/s): 105.60, #queue-req: 0
- 2025-07-20 17:33:52,684 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:33:53,503 - __main__ - INFO - Finished TaskGroup for worker on 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
- 2025-07-20 17:33:53,503 - __main__ - INFO - Got 1 docs for 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
- 2025-07-20 17:33:53,505 - __main__ - INFO - Worker 0 processing work item 03f19a67ca1619f854740bd806a32d7112c3c315
- 2025-07-20 17:33:53,505 - __main__ - INFO - Created all tasks for 03f19a67ca1619f854740bd806a32d7112c3c315
- 2025-07-20 17:33:53,512 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
- 2025-07-20 17:33:53,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
- 2025-07-20 17:33:53,655 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
- 2025-07-20 17:33:53,664 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
- 2025-07-20 17:33:53,668 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
- 2025-07-20 17:33:53,691 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
- 2025-07-20 17:33:53,693 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
- 2025-07-20 17:33:53,697 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
- 2025-07-20 17:33:53,701 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
- 2025-07-20 17:33:53,720 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
- 2025-07-20 17:33:53,788 - sglang - INFO - [2025-07-20 17:33:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1928, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:33:53,788 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:33:54,473 - sglang - INFO - [2025-07-20 17:33:54 TP0] Prefill batch. #new-seq: 6, #new-token: 13089, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
- 2025-07-20 17:33:54,473 - __main__ - INFO - sglang running req: 1 queue req: 2
- 2025-07-20 17:33:56,825 - __main__ - INFO - Queue remaining: 13
- 2025-07-20 17:33:56,825 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 459.10 605.68
- finished_output_tokens 112.15 150.76
- sglang_input_tokens 463.04 592.35
- sglang_output_tokens 117.60 148.92
- 2025-07-20 17:33:56,826 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 9
- 2025-07-20 17:33:58,471 - sglang - INFO - [2025-07-20 17:33:58 TP0] Decode batch. #running-req: 7, #token: 15024, token usage: 0.40, gen throughput (token/s): 10.54, #queue-req: 2
- 2025-07-20 17:33:58,471 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:33:59,347 - sglang - INFO - [2025-07-20 17:33:59 TP0] Decode batch. #running-req: 7, #token: 15304, token usage: 0.40, gen throughput (token/s): 319.30, #queue-req: 2
- 2025-07-20 17:33:59,348 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:00,228 - sglang - INFO - [2025-07-20 17:34:00 TP0] Decode batch. #running-req: 7, #token: 15584, token usage: 0.41, gen throughput (token/s): 317.85, #queue-req: 2
- 2025-07-20 17:34:00,229 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:01,109 - sglang - INFO - [2025-07-20 17:34:01 TP0] Decode batch. #running-req: 7, #token: 15864, token usage: 0.42, gen throughput (token/s): 317.85, #queue-req: 2
- 2025-07-20 17:34:01,109 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:01,990 - sglang - INFO - [2025-07-20 17:34:01 TP0] Decode batch. #running-req: 7, #token: 16144, token usage: 0.42, gen throughput (token/s): 317.83, #queue-req: 2
- 2025-07-20 17:34:01,990 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:02,871 - sglang - INFO - [2025-07-20 17:34:02 TP0] Decode batch. #running-req: 7, #token: 16424, token usage: 0.43, gen throughput (token/s): 317.77, #queue-req: 2
- 2025-07-20 17:34:02,872 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:03,753 - sglang - INFO - [2025-07-20 17:34:03 TP0] Decode batch. #running-req: 7, #token: 16704, token usage: 0.44, gen throughput (token/s): 317.49, #queue-req: 2
- 2025-07-20 17:34:03,753 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:34:04,573 - sglang - INFO - [2025-07-20 17:34:04 TP0] Prefill batch. #new-seq: 2, #new-token: 4265, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 0
- 2025-07-20 17:34:04,573 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:05,970 - sglang - INFO - [2025-07-20 17:34:05 TP0] Decode batch. #running-req: 8, #token: 18762, token usage: 0.49, gen throughput (token/s): 127.19, #queue-req: 0
- 2025-07-20 17:34:05,971 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:34:06,826 - __main__ - INFO - Queue remaining: 13
- 2025-07-20 17:34:06,827 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 450.91 546.64
- finished_output_tokens 110.15 138.33
- sglang_input_tokens 466.07 567.73
- sglang_output_tokens 117.10 141.32
- 2025-07-20 17:34:06,827 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 9
- 2025-07-20 17:34:06,855 - sglang - INFO - [2025-07-20 17:34:06 TP0] Decode batch. #running-req: 6, #token: 14326, token usage: 0.38, gen throughput (token/s): 332.22, #queue-req: 0
- 2025-07-20 17:34:06,855 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:07,733 - sglang - INFO - [2025-07-20 17:34:07 TP0] Decode batch. #running-req: 6, #token: 14566, token usage: 0.38, gen throughput (token/s): 273.46, #queue-req: 0
- 2025-07-20 17:34:07,733 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:08,612 - sglang - INFO - [2025-07-20 17:34:08 TP0] Decode batch. #running-req: 6, #token: 14806, token usage: 0.39, gen throughput (token/s): 273.18, #queue-req: 0
- 2025-07-20 17:34:08,612 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:09,491 - sglang - INFO - [2025-07-20 17:34:09 TP0] Decode batch. #running-req: 6, #token: 15046, token usage: 0.40, gen throughput (token/s): 272.84, #queue-req: 0
- 2025-07-20 17:34:09,491 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:10,373 - sglang - INFO - [2025-07-20 17:34:10 TP0] Decode batch. #running-req: 6, #token: 15286, token usage: 0.40, gen throughput (token/s): 272.06, #queue-req: 0
- 2025-07-20 17:34:10,374 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:11,256 - sglang - INFO - [2025-07-20 17:34:11 TP0] Decode batch. #running-req: 5, #token: 12947, token usage: 0.34, gen throughput (token/s): 258.40, #queue-req: 0
- 2025-07-20 17:34:11,256 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:34:12,125 - sglang - INFO - [2025-07-20 17:34:12 TP0] Decode batch. #running-req: 4, #token: 10132, token usage: 0.27, gen throughput (token/s): 205.84, #queue-req: 0
- 2025-07-20 17:34:12,125 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:34:12,986 - sglang - INFO - [2025-07-20 17:34:12 TP0] Decode batch. #running-req: 3, #token: 7479, token usage: 0.20, gen throughput (token/s): 170.85, #queue-req: 0
- 2025-07-20 17:34:12,986 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:13,838 - sglang - INFO - [2025-07-20 17:34:13 TP0] Decode batch. #running-req: 3, #token: 7599, token usage: 0.20, gen throughput (token/s): 140.82, #queue-req: 0
- 2025-07-20 17:34:13,838 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:14,677 - sglang - INFO - [2025-07-20 17:34:14 TP0] Decode batch. #running-req: 2, #token: 5071, token usage: 0.13, gen throughput (token/s): 103.62, #queue-req: 0
- 2025-07-20 17:34:14,678 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:15,514 - sglang - INFO - [2025-07-20 17:34:15 TP0] Decode batch. #running-req: 2, #token: 5151, token usage: 0.14, gen throughput (token/s): 95.64, #queue-req: 0
- 2025-07-20 17:34:15,514 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:16,353 - sglang - INFO - [2025-07-20 17:34:16 TP0] Decode batch. #running-req: 2, #token: 5231, token usage: 0.14, gen throughput (token/s): 95.34, #queue-req: 0
- 2025-07-20 17:34:16,353 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:16,827 - __main__ - INFO - Queue remaining: 13
- 2025-07-20 17:34:16,828 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 443.00 546.64
- finished_output_tokens 108.22 138.33
- sglang_input_tokens 473.13 593.02
- sglang_output_tokens 119.06 148.84
- 2025-07-20 17:34:16,828 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 9
- 2025-07-20 17:34:17,193 - sglang - INFO - [2025-07-20 17:34:17 TP0] Decode batch. #running-req: 2, #token: 5311, token usage: 0.14, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:34:17,193 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:18,031 - sglang - INFO - [2025-07-20 17:34:18 TP0] Decode batch. #running-req: 2, #token: 5391, token usage: 0.14, gen throughput (token/s): 95.38, #queue-req: 0
- 2025-07-20 17:34:18,032 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:18,864 - sglang - INFO - [2025-07-20 17:34:18 TP0] Decode batch. #running-req: 1, #token: 2838, token usage: 0.07, gen throughput (token/s): 63.66, #queue-req: 0
- 2025-07-20 17:34:18,864 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:19,098 - __main__ - INFO - Finished TaskGroup for worker on 03f19a67ca1619f854740bd806a32d7112c3c315
- 2025-07-20 17:34:19,098 - __main__ - INFO - Got 1 docs for 03f19a67ca1619f854740bd806a32d7112c3c315
- 2025-07-20 17:34:19,100 - __main__ - INFO - Worker 0 processing work item 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
- 2025-07-20 17:34:19,100 - __main__ - INFO - Created all tasks for 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
- 2025-07-20 17:34:19,106 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
- 2025-07-20 17:34:19,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
- 2025-07-20 17:34:19,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
- 2025-07-20 17:34:19,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
- 2025-07-20 17:34:19,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
- 2025-07-20 17:34:19,292 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
- 2025-07-20 17:34:19,316 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
- 2025-07-20 17:34:19,357 - sglang - INFO - [2025-07-20 17:34:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1350, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:34:19,357 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:34:19,888 - sglang - INFO - [2025-07-20 17:34:19 TP0] Prefill batch. #new-seq: 5, #new-token: 10360, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:34:19,888 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:23,706 - sglang - INFO - [2025-07-20 17:34:23 TP0] Decode batch. #running-req: 6, #token: 11884, token usage: 0.31, gen throughput (token/s): 38.21, #queue-req: 0
- 2025-07-20 17:34:23,706 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:24,580 - sglang - INFO - [2025-07-20 17:34:24 TP0] Decode batch. #running-req: 6, #token: 12124, token usage: 0.32, gen throughput (token/s): 274.65, #queue-req: 0
- 2025-07-20 17:34:24,580 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:25,453 - sglang - INFO - [2025-07-20 17:34:25 TP0] Decode batch. #running-req: 6, #token: 12364, token usage: 0.33, gen throughput (token/s): 274.75, #queue-req: 0
- 2025-07-20 17:34:25,454 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:26,327 - sglang - INFO - [2025-07-20 17:34:26 TP0] Decode batch. #running-req: 6, #token: 12604, token usage: 0.33, gen throughput (token/s): 274.72, #queue-req: 0
- 2025-07-20 17:34:26,327 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:26,829 - __main__ - INFO - Queue remaining: 12
- 2025-07-20 17:34:26,829 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 468.58 610.91
- finished_output_tokens 113.90 152.92
- sglang_input_tokens 472.33 599.56
- sglang_output_tokens 119.06 151.26
- 2025-07-20 17:34:26,830 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:34:27,203 - sglang - INFO - [2025-07-20 17:34:27 TP0] Decode batch. #running-req: 6, #token: 12844, token usage: 0.34, gen throughput (token/s): 273.85, #queue-req: 0
- 2025-07-20 17:34:27,203 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:28,077 - sglang - INFO - [2025-07-20 17:34:28 TP0] Decode batch. #running-req: 5, #token: 11505, token usage: 0.30, gen throughput (token/s): 271.08, #queue-req: 0
- 2025-07-20 17:34:28,077 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:34:28,941 - sglang - INFO - [2025-07-20 17:34:28 TP0] Decode batch. #running-req: 4, #token: 9568, token usage: 0.25, gen throughput (token/s): 210.83, #queue-req: 0
- 2025-07-20 17:34:28,941 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:34:29,797 - sglang - INFO - [2025-07-20 17:34:29 TP0] Decode batch. #running-req: 4, #token: 9728, token usage: 0.26, gen throughput (token/s): 186.85, #queue-req: 0
- 2025-07-20 17:34:29,797 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:34:30,650 - sglang - INFO - [2025-07-20 17:34:30 TP0] Decode batch. #running-req: 3, #token: 7580, token usage: 0.20, gen throughput (token/s): 168.78, #queue-req: 0
- 2025-07-20 17:34:30,650 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:31,500 - sglang - INFO - [2025-07-20 17:34:31 TP0] Decode batch. #running-req: 3, #token: 7700, token usage: 0.20, gen throughput (token/s): 141.18, #queue-req: 0
- 2025-07-20 17:34:31,500 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:32,348 - sglang - INFO - [2025-07-20 17:34:32 TP0] Decode batch. #running-req: 2, #token: 5534, token usage: 0.15, gen throughput (token/s): 123.86, #queue-req: 0
- 2025-07-20 17:34:32,348 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:33,188 - sglang - INFO - [2025-07-20 17:34:33 TP0] Decode batch. #running-req: 2, #token: 5614, token usage: 0.15, gen throughput (token/s): 95.19, #queue-req: 0
- 2025-07-20 17:34:33,189 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:34,027 - sglang - INFO - [2025-07-20 17:34:34 TP0] Decode batch. #running-req: 2, #token: 5694, token usage: 0.15, gen throughput (token/s): 95.43, #queue-req: 0
- 2025-07-20 17:34:34,027 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:34,865 - sglang - INFO - [2025-07-20 17:34:34 TP0] Decode batch. #running-req: 2, #token: 5774, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
- 2025-07-20 17:34:34,866 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:35,705 - sglang - INFO - [2025-07-20 17:34:35 TP0] Decode batch. #running-req: 2, #token: 5854, token usage: 0.15, gen throughput (token/s): 95.30, #queue-req: 0
- 2025-07-20 17:34:35,705 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:34:36,542 - sglang - INFO - [2025-07-20 17:34:36 TP0] Decode batch. #running-req: 1, #token: 2781, token usage: 0.07, gen throughput (token/s): 81.26, #queue-req: 0
- 2025-07-20 17:34:36,542 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:36,831 - __main__ - INFO - Queue remaining: 12
- 2025-07-20 17:34:36,831 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 460.65 577.38
- finished_output_tokens 111.97 145.62
- sglang_input_tokens 480.51 590.70
- sglang_output_tokens 120.17 148.85
- 2025-07-20 17:34:36,831 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 6
- 2025-07-20 17:34:37,370 - sglang - INFO - [2025-07-20 17:34:37 TP0] Decode batch. #running-req: 1, #token: 2821, token usage: 0.07, gen throughput (token/s): 48.25, #queue-req: 0
- 2025-07-20 17:34:37,371 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:37,874 - __main__ - INFO - Finished TaskGroup for worker on 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
- 2025-07-20 17:34:37,874 - __main__ - INFO - Got 1 docs for 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
- 2025-07-20 17:34:37,875 - __main__ - INFO - Worker 0 processing work item 225426c1e59a9bf843a4d1088c3c98aa0321642c
- 2025-07-20 17:34:37,875 - __main__ - INFO - Created all tasks for 225426c1e59a9bf843a4d1088c3c98aa0321642c
- 2025-07-20 17:34:37,881 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
- 2025-07-20 17:34:37,995 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
- 2025-07-20 17:34:38,030 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
- 2025-07-20 17:34:38,074 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
- 2025-07-20 17:34:38,079 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
- 2025-07-20 17:34:38,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
- 2025-07-20 17:34:38,140 - sglang - INFO - [2025-07-20 17:34:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:34:38,140 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:34:38,147 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
- 2025-07-20 17:34:38,720 - sglang - INFO - [2025-07-20 17:34:38 TP0] Prefill batch. #new-seq: 5, #new-token: 11856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:34:38,721 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:42,572 - sglang - INFO - [2025-07-20 17:34:42 TP0] Decode batch. #running-req: 6, #token: 13448, token usage: 0.35, gen throughput (token/s): 23.07, #queue-req: 0
- 2025-07-20 17:34:42,572 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:43,449 - sglang - INFO - [2025-07-20 17:34:43 TP0] Decode batch. #running-req: 6, #token: 13688, token usage: 0.36, gen throughput (token/s): 273.76, #queue-req: 0
- 2025-07-20 17:34:43,449 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:44,324 - sglang - INFO - [2025-07-20 17:34:44 TP0] Decode batch. #running-req: 6, #token: 13928, token usage: 0.37, gen throughput (token/s): 274.12, #queue-req: 0
- 2025-07-20 17:34:44,325 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:45,201 - sglang - INFO - [2025-07-20 17:34:45 TP0] Decode batch. #running-req: 6, #token: 14168, token usage: 0.37, gen throughput (token/s): 273.73, #queue-req: 0
- 2025-07-20 17:34:45,201 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:46,080 - sglang - INFO - [2025-07-20 17:34:46 TP0] Decode batch. #running-req: 6, #token: 14408, token usage: 0.38, gen throughput (token/s): 273.10, #queue-req: 0
- 2025-07-20 17:34:46,080 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:46,833 - __main__ - INFO - Queue remaining: 11
- 2025-07-20 17:34:46,834 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 472.47 616.41
- finished_output_tokens 114.33 154.09
- sglang_input_tokens 476.09 597.88
- sglang_output_tokens 119.33 151.16
- 2025-07-20 17:34:46,834 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:34:46,958 - sglang - INFO - [2025-07-20 17:34:46 TP0] Decode batch. #running-req: 6, #token: 14648, token usage: 0.39, gen throughput (token/s): 273.23, #queue-req: 0
- 2025-07-20 17:34:46,958 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:34:47,832 - sglang - INFO - [2025-07-20 17:34:47 TP0] Decode batch. #running-req: 5, #token: 13136, token usage: 0.35, gen throughput (token/s): 239.11, #queue-req: 0
- 2025-07-20 17:34:47,832 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:34:48,700 - sglang - INFO - [2025-07-20 17:34:48 TP0] Decode batch. #running-req: 4, #token: 10664, token usage: 0.28, gen throughput (token/s): 214.36, #queue-req: 0
- 2025-07-20 17:34:48,700 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:34:49,559 - sglang - INFO - [2025-07-20 17:34:49 TP0] Decode batch. #running-req: 4, #token: 10824, token usage: 0.28, gen throughput (token/s): 186.35, #queue-req: 0
- 2025-07-20 17:34:49,559 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:34:50,418 - sglang - INFO - [2025-07-20 17:34:50 TP0] Decode batch. #running-req: 3, #token: 8920, token usage: 0.23, gen throughput (token/s): 173.40, #queue-req: 0
- 2025-07-20 17:34:50,418 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:51,272 - sglang - INFO - [2025-07-20 17:34:51 TP0] Decode batch. #running-req: 3, #token: 9040, token usage: 0.24, gen throughput (token/s): 140.49, #queue-req: 0
- 2025-07-20 17:34:51,272 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:52,126 - sglang - INFO - [2025-07-20 17:34:52 TP0] Decode batch. #running-req: 3, #token: 9160, token usage: 0.24, gen throughput (token/s): 140.50, #queue-req: 0
- 2025-07-20 17:34:52,126 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:52,979 - sglang - INFO - [2025-07-20 17:34:52 TP0] Decode batch. #running-req: 3, #token: 9280, token usage: 0.24, gen throughput (token/s): 140.61, #queue-req: 0
- 2025-07-20 17:34:52,980 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:53,834 - sglang - INFO - [2025-07-20 17:34:53 TP0] Decode batch. #running-req: 3, #token: 9400, token usage: 0.25, gen throughput (token/s): 140.39, #queue-req: 0
- 2025-07-20 17:34:53,834 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:54,690 - sglang - INFO - [2025-07-20 17:34:54 TP0] Decode batch. #running-req: 3, #token: 9520, token usage: 0.25, gen throughput (token/s): 140.16, #queue-req: 0
- 2025-07-20 17:34:54,691 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:55,545 - sglang - INFO - [2025-07-20 17:34:55 TP0] Decode batch. #running-req: 3, #token: 9640, token usage: 0.25, gen throughput (token/s): 140.44, #queue-req: 0
- 2025-07-20 17:34:55,545 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:34:56,399 - sglang - INFO - [2025-07-20 17:34:56 TP0] Decode batch. #running-req: 1, #token: 3342, token usage: 0.09, gen throughput (token/s): 135.77, #queue-req: 0
- 2025-07-20 17:34:56,399 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:56,834 - __main__ - INFO - Queue remaining: 11
- 2025-07-20 17:34:56,834 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 464.73 571.90
- finished_output_tokens 112.46 143.42
- sglang_input_tokens 485.76 607.46
- sglang_output_tokens 120.95 150.70
- 2025-07-20 17:34:56,834 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 6
- 2025-07-20 17:34:57,230 - sglang - INFO - [2025-07-20 17:34:57 TP0] Decode batch. #running-req: 1, #token: 3382, token usage: 0.09, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:34:57,231 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:58,062 - sglang - INFO - [2025-07-20 17:34:58 TP0] Decode batch. #running-req: 1, #token: 3422, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:34:58,062 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:58,893 - sglang - INFO - [2025-07-20 17:34:58 TP0] Decode batch. #running-req: 1, #token: 3462, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:34:58,893 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:34:59,724 - sglang - INFO - [2025-07-20 17:34:59 TP0] Decode batch. #running-req: 1, #token: 3502, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
- 2025-07-20 17:34:59,724 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:00,556 - sglang - INFO - [2025-07-20 17:35:00 TP0] Decode batch. #running-req: 1, #token: 3542, token usage: 0.09, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:35:00,556 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:00,958 - __main__ - INFO - Finished TaskGroup for worker on 225426c1e59a9bf843a4d1088c3c98aa0321642c
- 2025-07-20 17:35:00,958 - __main__ - INFO - Got 1 docs for 225426c1e59a9bf843a4d1088c3c98aa0321642c
- 2025-07-20 17:35:00,959 - __main__ - INFO - Worker 0 processing work item 398aeb9cc239880a7222603994af5c4016796381
- 2025-07-20 17:35:00,959 - __main__ - INFO - Created all tasks for 398aeb9cc239880a7222603994af5c4016796381
- 2025-07-20 17:35:00,964 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
- 2025-07-20 17:35:01,021 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
- 2025-07-20 17:35:01,076 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
- 2025-07-20 17:35:01,103 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
- 2025-07-20 17:35:01,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
- 2025-07-20 17:35:01,158 - sglang - INFO - [2025-07-20 17:35:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:35:01,158 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:35:01,186 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
- 2025-07-20 17:35:01,653 - sglang - INFO - [2025-07-20 17:35:01 TP0] Prefill batch. #new-seq: 4, #new-token: 8057, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 0
- 2025-07-20 17:35:01,653 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:04,619 - sglang - INFO - [2025-07-20 17:35:04 TP0] Decode batch. #running-req: 5, #token: 9264, token usage: 0.24, gen throughput (token/s): 30.52, #queue-req: 0
- 2025-07-20 17:35:04,619 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:35:05,477 - sglang - INFO - [2025-07-20 17:35:05 TP0] Decode batch. #running-req: 4, #token: 8301, token usage: 0.22, gen throughput (token/s): 200.47, #queue-req: 0
- 2025-07-20 17:35:05,477 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:35:06,329 - sglang - INFO - [2025-07-20 17:35:06 TP0] Decode batch. #running-req: 4, #token: 8461, token usage: 0.22, gen throughput (token/s): 187.63, #queue-req: 0
- 2025-07-20 17:35:06,329 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:35:06,835 - __main__ - INFO - Queue remaining: 10
- 2025-07-20 17:35:06,836 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 478.76 616.41
- finished_output_tokens 115.58 153.62
- sglang_input_tokens 484.04 614.13
- sglang_output_tokens 120.47 152.28
- 2025-07-20 17:35:06,836 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 5
- 2025-07-20 17:35:07,182 - sglang - INFO - [2025-07-20 17:35:07 TP0] Decode batch. #running-req: 4, #token: 8621, token usage: 0.23, gen throughput (token/s): 187.71, #queue-req: 0
- 2025-07-20 17:35:07,182 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:35:08,035 - sglang - INFO - [2025-07-20 17:35:08 TP0] Decode batch. #running-req: 3, #token: 7222, token usage: 0.19, gen throughput (token/s): 170.04, #queue-req: 0
- 2025-07-20 17:35:08,035 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:08,885 - sglang - INFO - [2025-07-20 17:35:08 TP0] Decode batch. #running-req: 3, #token: 7342, token usage: 0.19, gen throughput (token/s): 141.06, #queue-req: 0
- 2025-07-20 17:35:08,885 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:09,735 - sglang - INFO - [2025-07-20 17:35:09 TP0] Decode batch. #running-req: 3, #token: 7462, token usage: 0.20, gen throughput (token/s): 141.23, #queue-req: 0
- 2025-07-20 17:35:09,735 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:10,584 - sglang - INFO - [2025-07-20 17:35:10 TP0] Decode batch. #running-req: 3, #token: 7582, token usage: 0.20, gen throughput (token/s): 141.31, #queue-req: 0
- 2025-07-20 17:35:10,584 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:11,435 - sglang - INFO - [2025-07-20 17:35:11 TP0] Decode batch. #running-req: 3, #token: 7702, token usage: 0.20, gen throughput (token/s): 140.98, #queue-req: 0
- 2025-07-20 17:35:11,435 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:12,288 - sglang - INFO - [2025-07-20 17:35:12 TP0] Decode batch. #running-req: 3, #token: 7822, token usage: 0.21, gen throughput (token/s): 140.76, #queue-req: 0
- 2025-07-20 17:35:12,288 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:13,139 - sglang - INFO - [2025-07-20 17:35:13 TP0] Decode batch. #running-req: 3, #token: 7942, token usage: 0.21, gen throughput (token/s): 140.97, #queue-req: 0
- 2025-07-20 17:35:13,139 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:13,990 - sglang - INFO - [2025-07-20 17:35:13 TP0] Decode batch. #running-req: 3, #token: 8062, token usage: 0.21, gen throughput (token/s): 141.02, #queue-req: 0
- 2025-07-20 17:35:13,990 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:14,843 - sglang - INFO - [2025-07-20 17:35:14 TP0] Decode batch. #running-req: 3, #token: 8182, token usage: 0.22, gen throughput (token/s): 140.76, #queue-req: 0
- 2025-07-20 17:35:14,843 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:15,695 - sglang - INFO - [2025-07-20 17:35:15 TP0] Decode batch. #running-req: 3, #token: 8302, token usage: 0.22, gen throughput (token/s): 140.80, #queue-req: 0
- 2025-07-20 17:35:15,695 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:16,547 - sglang - INFO - [2025-07-20 17:35:16 TP0] Decode batch. #running-req: 3, #token: 8422, token usage: 0.22, gen throughput (token/s): 140.78, #queue-req: 0
- 2025-07-20 17:35:16,547 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:16,837 - __main__ - INFO - Queue remaining: 10
- 2025-07-20 17:35:16,837 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 471.17 616.41
- finished_output_tokens 113.75 153.62
- sglang_input_tokens 478.55 595.10
- sglang_output_tokens 118.82 146.39
- 2025-07-20 17:35:16,838 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 5
- 2025-07-20 17:35:17,396 - sglang - INFO - [2025-07-20 17:35:17 TP0] Decode batch. #running-req: 2, #token: 5339, token usage: 0.14, gen throughput (token/s): 124.85, #queue-req: 0
- 2025-07-20 17:35:17,396 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:35:18,228 - sglang - INFO - [2025-07-20 17:35:18 TP0] Decode batch. #running-req: 1, #token: 2858, token usage: 0.08, gen throughput (token/s): 54.10, #queue-req: 0
- 2025-07-20 17:35:18,228 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:19,058 - sglang - INFO - [2025-07-20 17:35:19 TP0] Decode batch. #running-req: 1, #token: 2898, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
- 2025-07-20 17:35:19,058 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:19,886 - sglang - INFO - [2025-07-20 17:35:19 TP0] Decode batch. #running-req: 1, #token: 2938, token usage: 0.08, gen throughput (token/s): 48.27, #queue-req: 0
- 2025-07-20 17:35:19,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:20,716 - sglang - INFO - [2025-07-20 17:35:20 TP0] Decode batch. #running-req: 1, #token: 2978, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:35:20,716 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:35:21,114 - __main__ - INFO - Finished TaskGroup for worker on 398aeb9cc239880a7222603994af5c4016796381
- 2025-07-20 17:35:21,114 - __main__ - INFO - Got 1 docs for 398aeb9cc239880a7222603994af5c4016796381
- 2025-07-20 17:35:21,115 - __main__ - INFO - Worker 0 processing work item 06798e8f7cc26525f138f26354ffab7c63074f2c
- 2025-07-20 17:35:21,115 - __main__ - INFO - Created all tasks for 06798e8f7cc26525f138f26354ffab7c63074f2c
- 2025-07-20 17:35:21,121 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
- 2025-07-20 17:35:21,301 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
- 2025-07-20 17:35:21,305 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 17:35:21,316 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
- 2025-07-20 17:35:21,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
- 2025-07-20 17:35:21,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
- 2025-07-20 17:35:21,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
- 2025-07-20 17:35:21,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
- 2025-07-20 17:35:21,357 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
- 2025-07-20 17:35:21,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
- 2025-07-20 17:35:21,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
- 2025-07-20 17:35:21,374 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
- 2025-07-20 17:35:21,380 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
- 2025-07-20 17:35:21,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
- 2025-07-20 17:35:21,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
- 2025-07-20 17:35:21,476 - sglang - INFO - [2025-07-20 17:35:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2043, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:35:21,476 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:35:22,178 - sglang - INFO - [2025-07-20 17:35:22 TP0] Prefill batch. #new-seq: 6, #new-token: 12919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 7
- 2025-07-20 17:35:22,178 - __main__ - INFO - sglang running req: 1 queue req: 7
- 2025-07-20 17:35:26,676 - sglang - INFO - [2025-07-20 17:35:26 TP0] Decode batch. #running-req: 7, #token: 15109, token usage: 0.40, gen throughput (token/s): 27.85, #queue-req: 7
- 2025-07-20 17:35:26,676 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:26,838 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:35:26,838 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 478.11 592.65
- finished_output_tokens 115.46 143.38
- sglang_input_tokens 481.50 592.65
- sglang_output_tokens 120.15 143.38
- 2025-07-20 17:35:26,839 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 14
- 2025-07-20 17:35:27,557 - sglang - INFO - [2025-07-20 17:35:27 TP0] Decode batch. #running-req: 7, #token: 15389, token usage: 0.41, gen throughput (token/s): 317.87, #queue-req: 7
- 2025-07-20 17:35:27,557 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:28,438 - sglang - INFO - [2025-07-20 17:35:28 TP0] Decode batch. #running-req: 7, #token: 15669, token usage: 0.41, gen throughput (token/s): 317.80, #queue-req: 7
- 2025-07-20 17:35:28,438 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:29,320 - sglang - INFO - [2025-07-20 17:35:29 TP0] Decode batch. #running-req: 7, #token: 15949, token usage: 0.42, gen throughput (token/s): 317.45, #queue-req: 7
- 2025-07-20 17:35:29,320 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:30,213 - sglang - INFO - [2025-07-20 17:35:30 TP0] Decode batch. #running-req: 7, #token: 16229, token usage: 0.43, gen throughput (token/s): 313.54, #queue-req: 7
- 2025-07-20 17:35:30,213 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:31,106 - sglang - INFO - [2025-07-20 17:35:31 TP0] Decode batch. #running-req: 7, #token: 16509, token usage: 0.43, gen throughput (token/s): 313.70, #queue-req: 7
- 2025-07-20 17:35:31,106 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:31,996 - sglang - INFO - [2025-07-20 17:35:31 TP0] Decode batch. #running-req: 7, #token: 16789, token usage: 0.44, gen throughput (token/s): 314.42, #queue-req: 7
- 2025-07-20 17:35:31,996 - __main__ - INFO - sglang running req: 7 queue req: 7
- 2025-07-20 17:35:32,063 - sglang - INFO - [2025-07-20 17:35:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4940, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 5
- 2025-07-20 17:35:32,064 - __main__ - INFO - sglang running req: 6 queue req: 5
- 2025-07-20 17:35:34,348 - sglang - INFO - [2025-07-20 17:35:34 TP0] Decode batch. #running-req: 8, #token: 19967, token usage: 0.53, gen throughput (token/s): 134.34, #queue-req: 5
- 2025-07-20 17:35:34,349 - __main__ - INFO - sglang running req: 8 queue req: 5
- 2025-07-20 17:35:35,130 - sglang - INFO - [2025-07-20 17:35:35 TP0] Prefill batch. #new-seq: 2, #new-token: 4920, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.47, #running-req: 7, #queue-req: 3
- 2025-07-20 17:35:35,131 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:35:36,712 - sglang - INFO - [2025-07-20 17:35:36 TP0] Decode batch. #running-req: 9, #token: 22833, token usage: 0.60, gen throughput (token/s): 137.09, #queue-req: 3
- 2025-07-20 17:35:36,712 - __main__ - INFO - sglang running req: 9 queue req: 3
- 2025-07-20 17:35:36,841 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:35:36,841 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 470.76 592.65
- finished_output_tokens 113.69 143.38
- sglang_input_tokens 480.03 571.24
- sglang_output_tokens 119.22 138.01
- 2025-07-20 17:35:36,841 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 14
- 2025-07-20 17:35:37,662 - sglang - INFO - [2025-07-20 17:35:37 TP0] Decode batch. #running-req: 9, #token: 23193, token usage: 0.61, gen throughput (token/s): 378.99, #queue-req: 3
- 2025-07-20 17:35:37,662 - __main__ - INFO - sglang running req: 9 queue req: 3
- 2025-07-20 17:35:37,851 - sglang - INFO - [2025-07-20 17:35:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2576, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 2
- 2025-07-20 17:35:37,851 - __main__ - INFO - sglang running req: 8 queue req: 2
- 2025-07-20 17:35:39,390 - sglang - INFO - [2025-07-20 17:35:39 TP0] Decode batch. #running-req: 9, #token: 23266, token usage: 0.61, gen throughput (token/s): 207.69, #queue-req: 2
- 2025-07-20 17:35:39,390 - __main__ - INFO - sglang running req: 9 queue req: 2
- 2025-07-20 17:35:40,104 - sglang - INFO - [2025-07-20 17:35:40 TP0] Prefill batch. #new-seq: 2, #new-token: 4634, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 8, #queue-req: 0
- 2025-07-20 17:35:40,104 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:41,764 - sglang - INFO - [2025-07-20 17:35:41 TP0] Decode batch. #running-req: 10, #token: 25674, token usage: 0.68, gen throughput (token/s): 155.45, #queue-req: 0
- 2025-07-20 17:35:41,764 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:35:42,724 - sglang - INFO - [2025-07-20 17:35:42 TP0] Decode batch. #running-req: 10, #token: 26074, token usage: 0.69, gen throughput (token/s): 416.59, #queue-req: 0
- 2025-07-20 17:35:42,724 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:35:43,686 - sglang - INFO - [2025-07-20 17:35:43 TP0] Decode batch. #running-req: 10, #token: 26474, token usage: 0.70, gen throughput (token/s): 415.76, #queue-req: 0
- 2025-07-20 17:35:43,687 - __main__ - INFO - sglang running req: 10 queue req: 0
- 2025-07-20 17:35:44,646 - sglang - INFO - [2025-07-20 17:35:44 TP0] Decode batch. #running-req: 9, #token: 24245, token usage: 0.64, gen throughput (token/s): 405.16, #queue-req: 0
- 2025-07-20 17:35:44,647 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:35:45,555 - sglang - INFO - [2025-07-20 17:35:45 TP0] Decode batch. #running-req: 8, #token: 21644, token usage: 0.57, gen throughput (token/s): 363.08, #queue-req: 0
- 2025-07-20 17:35:45,556 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:46,451 - sglang - INFO - [2025-07-20 17:35:46 TP0] Decode batch. #running-req: 8, #token: 21964, token usage: 0.58, gen throughput (token/s): 357.28, #queue-req: 0
- 2025-07-20 17:35:46,451 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:46,842 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:35:46,842 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 463.63 592.65
- finished_output_tokens 111.97 143.38
- sglang_input_tokens 486.40 587.09
- sglang_output_tokens 120.45 140.09
- 2025-07-20 17:35:46,843 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 6 | 14
- 2025-07-20 17:35:47,349 - sglang - INFO - [2025-07-20 17:35:47 TP0] Decode batch. #running-req: 8, #token: 22284, token usage: 0.59, gen throughput (token/s): 356.29, #queue-req: 0
- 2025-07-20 17:35:47,349 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:48,249 - sglang - INFO - [2025-07-20 17:35:48 TP0] Decode batch. #running-req: 8, #token: 22604, token usage: 0.60, gen throughput (token/s): 355.46, #queue-req: 0
- 2025-07-20 17:35:48,250 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:49,150 - sglang - INFO - [2025-07-20 17:35:49 TP0] Decode batch. #running-req: 8, #token: 22924, token usage: 0.60, gen throughput (token/s): 355.28, #queue-req: 0
- 2025-07-20 17:35:49,150 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:35:50,052 - sglang - INFO - [2025-07-20 17:35:50 TP0] Decode batch. #running-req: 7, #token: 20558, token usage: 0.54, gen throughput (token/s): 352.64, #queue-req: 0
- 2025-07-20 17:35:50,052 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:35:50,939 - sglang - INFO - [2025-07-20 17:35:50 TP0] Decode batch. #running-req: 6, #token: 17866, token usage: 0.47, gen throughput (token/s): 279.44, #queue-req: 0
- 2025-07-20 17:35:50,940 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:35:51,826 - sglang - INFO - [2025-07-20 17:35:51 TP0] Decode batch. #running-req: 6, #token: 18106, token usage: 0.48, gen throughput (token/s): 270.53, #queue-req: 0
- 2025-07-20 17:35:51,827 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:35:52,701 - sglang - INFO - [2025-07-20 17:35:52 TP0] Decode batch. #running-req: 3, #token: 8974, token usage: 0.24, gen throughput (token/s): 184.05, #queue-req: 0
- 2025-07-20 17:35:52,701 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:53,563 - sglang - INFO - [2025-07-20 17:35:53 TP0] Decode batch. #running-req: 3, #token: 9094, token usage: 0.24, gen throughput (token/s): 139.29, #queue-req: 0
- 2025-07-20 17:35:53,563 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:54,421 - sglang - INFO - [2025-07-20 17:35:54 TP0] Decode batch. #running-req: 3, #token: 9214, token usage: 0.24, gen throughput (token/s): 139.75, #queue-req: 0
- 2025-07-20 17:35:54,422 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:55,275 - sglang - INFO - [2025-07-20 17:35:55 TP0] Decode batch. #running-req: 3, #token: 9334, token usage: 0.25, gen throughput (token/s): 140.55, #queue-req: 0
- 2025-07-20 17:35:55,275 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:35:56,120 - sglang - INFO - [2025-07-20 17:35:56 TP0] Decode batch. #running-req: 2, #token: 6166, token usage: 0.16, gen throughput (token/s): 108.85, #queue-req: 0
- 2025-07-20 17:35:56,120 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:35:56,844 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:35:56,844 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 456.72 535.46
- finished_output_tokens 110.30 127.88
- sglang_input_tokens 501.14 627.52
- sglang_output_tokens 123.77 148.00
- 2025-07-20 17:35:56,844 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 12 | 14
- 2025-07-20 17:35:56,961 - sglang - INFO - [2025-07-20 17:35:56 TP0] Decode batch. #running-req: 2, #token: 6246, token usage: 0.16, gen throughput (token/s): 95.20, #queue-req: 0
- 2025-07-20 17:35:56,961 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:35:57,801 - sglang - INFO - [2025-07-20 17:35:57 TP0] Decode batch. #running-req: 2, #token: 6326, token usage: 0.17, gen throughput (token/s): 95.15, #queue-req: 0
- 2025-07-20 17:35:57,802 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:35:58,643 - sglang - INFO - [2025-07-20 17:35:58 TP0] Decode batch. #running-req: 2, #token: 6406, token usage: 0.17, gen throughput (token/s): 95.10, #queue-req: 0
- 2025-07-20 17:35:58,643 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:35:59,485 - sglang - INFO - [2025-07-20 17:35:59 TP0] Decode batch. #running-req: 2, #token: 6486, token usage: 0.17, gen throughput (token/s): 94.96, #queue-req: 0
- 2025-07-20 17:35:59,485 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:36:00,326 - sglang - INFO - [2025-07-20 17:36:00 TP0] Decode batch. #running-req: 2, #token: 6566, token usage: 0.17, gen throughput (token/s): 95.13, #queue-req: 0
- 2025-07-20 17:36:00,326 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:36:01,167 - sglang - INFO - [2025-07-20 17:36:01 TP0] Decode batch. #running-req: 2, #token: 6646, token usage: 0.17, gen throughput (token/s): 95.12, #queue-req: 0
- 2025-07-20 17:36:01,167 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:36:01,997 - sglang - INFO - [2025-07-20 17:36:01 TP0] Decode batch. #running-req: 1, #token: 3478, token usage: 0.09, gen throughput (token/s): 49.37, #queue-req: 0
- 2025-07-20 17:36:01,998 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:02,828 - sglang - INFO - [2025-07-20 17:36:02 TP0] Decode batch. #running-req: 1, #token: 3518, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:36:02,829 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:03,660 - sglang - INFO - [2025-07-20 17:36:03 TP0] Decode batch. #running-req: 1, #token: 3558, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:36:03,660 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:04,491 - sglang - INFO - [2025-07-20 17:36:04 TP0] Decode batch. #running-req: 1, #token: 3598, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
- 2025-07-20 17:36:04,491 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:05,321 - sglang - INFO - [2025-07-20 17:36:05 TP0] Decode batch. #running-req: 1, #token: 3638, token usage: 0.10, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:36:05,321 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:06,153 - sglang - INFO - [2025-07-20 17:36:06 TP0] Decode batch. #running-req: 1, #token: 3678, token usage: 0.10, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:36:06,153 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:06,845 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:06,845 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 450.00 535.46
- finished_output_tokens 108.68 127.88
- sglang_input_tokens 497.18 628.15
- sglang_output_tokens 123.26 150.15
- 2025-07-20 17:36:06,845 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:06,985 - sglang - INFO - [2025-07-20 17:36:06 TP0] Decode batch. #running-req: 1, #token: 3718, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:36:06,985 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:07,816 - sglang - INFO - [2025-07-20 17:36:07 TP0] Decode batch. #running-req: 1, #token: 3758, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:36:07,816 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:08,647 - sglang - INFO - [2025-07-20 17:36:08 TP0] Decode batch. #running-req: 1, #token: 3798, token usage: 0.10, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:36:08,647 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:09,479 - sglang - INFO - [2025-07-20 17:36:09 TP0] Decode batch. #running-req: 1, #token: 3838, token usage: 0.10, gen throughput (token/s): 48.07, #queue-req: 0
- 2025-07-20 17:36:09,479 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:10,311 - sglang - INFO - [2025-07-20 17:36:10 TP0] Decode batch. #running-req: 1, #token: 3878, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:36:10,311 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:11,142 - sglang - INFO - [2025-07-20 17:36:11 TP0] Decode batch. #running-req: 1, #token: 3918, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:36:11,142 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:11,973 - sglang - INFO - [2025-07-20 17:36:11 TP0] Decode batch. #running-req: 1, #token: 3958, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:36:11,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:12,805 - sglang - INFO - [2025-07-20 17:36:12 TP0] Decode batch. #running-req: 1, #token: 3998, token usage: 0.11, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:36:12,805 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:13,637 - sglang - INFO - [2025-07-20 17:36:13 TP0] Decode batch. #running-req: 1, #token: 4038, token usage: 0.11, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:36:13,638 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:14,470 - sglang - INFO - [2025-07-20 17:36:14 TP0] Decode batch. #running-req: 1, #token: 4078, token usage: 0.11, gen throughput (token/s): 48.06, #queue-req: 0
- 2025-07-20 17:36:14,470 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:15,302 - sglang - INFO - [2025-07-20 17:36:15 TP0] Decode batch. #running-req: 1, #token: 4118, token usage: 0.11, gen throughput (token/s): 48.05, #queue-req: 0
- 2025-07-20 17:36:15,302 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:16,135 - sglang - INFO - [2025-07-20 17:36:16 TP0] Decode batch. #running-req: 1, #token: 4158, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:36:16,135 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:16,847 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:16,847 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 443.49 535.46
- finished_output_tokens 107.10 127.88
- sglang_input_tokens 489.98 628.15
- sglang_output_tokens 121.48 150.15
- 2025-07-20 17:36:16,847 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:16,967 - sglang - INFO - [2025-07-20 17:36:16 TP0] Decode batch. #running-req: 1, #token: 4198, token usage: 0.11, gen throughput (token/s): 48.03, #queue-req: 0
- 2025-07-20 17:36:16,968 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:17,801 - sglang - INFO - [2025-07-20 17:36:17 TP0] Decode batch. #running-req: 1, #token: 4238, token usage: 0.11, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:36:17,801 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:18,634 - sglang - INFO - [2025-07-20 17:36:18 TP0] Decode batch. #running-req: 1, #token: 4278, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
- 2025-07-20 17:36:18,634 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:19,467 - sglang - INFO - [2025-07-20 17:36:19 TP0] Decode batch. #running-req: 1, #token: 4318, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
- 2025-07-20 17:36:19,467 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:20,300 - sglang - INFO - [2025-07-20 17:36:20 TP0] Decode batch. #running-req: 1, #token: 4358, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
- 2025-07-20 17:36:20,300 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:21,133 - sglang - INFO - [2025-07-20 17:36:21 TP0] Decode batch. #running-req: 1, #token: 4398, token usage: 0.12, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:36:21,133 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:21,966 - sglang - INFO - [2025-07-20 17:36:21 TP0] Decode batch. #running-req: 1, #token: 4438, token usage: 0.12, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:36:21,967 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:22,800 - sglang - INFO - [2025-07-20 17:36:22 TP0] Decode batch. #running-req: 1, #token: 4478, token usage: 0.12, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-07-20 17:36:22,801 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:23,634 - sglang - INFO - [2025-07-20 17:36:23 TP0] Decode batch. #running-req: 1, #token: 4518, token usage: 0.12, gen throughput (token/s): 47.98, #queue-req: 0
- 2025-07-20 17:36:23,634 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:24,468 - sglang - INFO - [2025-07-20 17:36:24 TP0] Decode batch. #running-req: 1, #token: 4558, token usage: 0.12, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:36:24,468 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:25,303 - sglang - INFO - [2025-07-20 17:36:25 TP0] Decode batch. #running-req: 1, #token: 4598, token usage: 0.12, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:36:25,303 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:26,138 - sglang - INFO - [2025-07-20 17:36:26 TP0] Decode batch. #running-req: 1, #token: 4638, token usage: 0.12, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:36:26,138 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:26,849 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:26,850 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 437.15 535.46
- finished_output_tokens 105.57 127.88
- sglang_input_tokens 482.99 628.15
- sglang_output_tokens 119.74 150.15
- 2025-07-20 17:36:26,850 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:26,973 - sglang - INFO - [2025-07-20 17:36:26 TP0] Decode batch. #running-req: 1, #token: 4678, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:36:26,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:27,808 - sglang - INFO - [2025-07-20 17:36:27 TP0] Decode batch. #running-req: 1, #token: 4718, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:36:27,808 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:28,642 - sglang - INFO - [2025-07-20 17:36:28 TP0] Decode batch. #running-req: 1, #token: 4758, token usage: 0.13, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:36:28,643 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:29,478 - sglang - INFO - [2025-07-20 17:36:29 TP0] Decode batch. #running-req: 1, #token: 4798, token usage: 0.13, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:36:29,478 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:30,313 - sglang - INFO - [2025-07-20 17:36:30 TP0] Decode batch. #running-req: 1, #token: 4838, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:36:30,313 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:31,147 - sglang - INFO - [2025-07-20 17:36:31 TP0] Decode batch. #running-req: 1, #token: 4878, token usage: 0.13, gen throughput (token/s): 47.92, #queue-req: 0
- 2025-07-20 17:36:31,148 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:31,982 - sglang - INFO - [2025-07-20 17:36:31 TP0] Decode batch. #running-req: 1, #token: 4918, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:36:31,982 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:32,818 - sglang - INFO - [2025-07-20 17:36:32 TP0] Decode batch. #running-req: 1, #token: 4958, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:36:32,818 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:33,653 - sglang - INFO - [2025-07-20 17:36:33 TP0] Decode batch. #running-req: 1, #token: 4998, token usage: 0.13, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:36:33,653 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:34,488 - sglang - INFO - [2025-07-20 17:36:34 TP0] Decode batch. #running-req: 1, #token: 5038, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:36:34,489 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:35,324 - sglang - INFO - [2025-07-20 17:36:35 TP0] Decode batch. #running-req: 1, #token: 5078, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:36:35,324 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:35,707 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604301201.pdf-10: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:36:35,843 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 17:36:36,014 - sglang - INFO - [2025-07-20 17:36:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2097, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:36:36,015 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:36:36,852 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:36,852 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 431.00 469.36
- finished_output_tokens 104.09 112.60
- sglang_input_tokens 479.14 564.80
- sglang_output_tokens 122.28 143.21
- 2025-07-20 17:36:36,852 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:37,146 - sglang - INFO - [2025-07-20 17:36:37 TP0] Decode batch. #running-req: 1, #token: 2119, token usage: 0.06, gen throughput (token/s): 21.96, #queue-req: 0
- 2025-07-20 17:36:37,146 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:37,972 - sglang - INFO - [2025-07-20 17:36:37 TP0] Decode batch. #running-req: 1, #token: 2159, token usage: 0.06, gen throughput (token/s): 48.37, #queue-req: 0
- 2025-07-20 17:36:37,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:38,799 - sglang - INFO - [2025-07-20 17:36:38 TP0] Decode batch. #running-req: 1, #token: 2199, token usage: 0.06, gen throughput (token/s): 48.43, #queue-req: 0
- 2025-07-20 17:36:38,799 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:39,626 - sglang - INFO - [2025-07-20 17:36:39 TP0] Decode batch. #running-req: 1, #token: 2239, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:36:39,627 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:40,454 - sglang - INFO - [2025-07-20 17:36:40 TP0] Decode batch. #running-req: 1, #token: 2279, token usage: 0.06, gen throughput (token/s): 48.33, #queue-req: 0
- 2025-07-20 17:36:40,454 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:41,281 - sglang - INFO - [2025-07-20 17:36:41 TP0] Decode batch. #running-req: 1, #token: 2319, token usage: 0.06, gen throughput (token/s): 48.41, #queue-req: 0
- 2025-07-20 17:36:41,281 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:42,107 - sglang - INFO - [2025-07-20 17:36:42 TP0] Decode batch. #running-req: 1, #token: 2359, token usage: 0.06, gen throughput (token/s): 48.38, #queue-req: 0
- 2025-07-20 17:36:42,108 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:42,935 - sglang - INFO - [2025-07-20 17:36:42 TP0] Decode batch. #running-req: 1, #token: 2399, token usage: 0.06, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-07-20 17:36:42,935 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:43,762 - sglang - INFO - [2025-07-20 17:36:43 TP0] Decode batch. #running-req: 1, #token: 2439, token usage: 0.06, gen throughput (token/s): 48.37, #queue-req: 0
- 2025-07-20 17:36:43,762 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:44,589 - sglang - INFO - [2025-07-20 17:36:44 TP0] Decode batch. #running-req: 1, #token: 2479, token usage: 0.07, gen throughput (token/s): 48.38, #queue-req: 0
- 2025-07-20 17:36:44,589 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:45,416 - sglang - INFO - [2025-07-20 17:36:45 TP0] Decode batch. #running-req: 1, #token: 2519, token usage: 0.07, gen throughput (token/s): 48.32, #queue-req: 0
- 2025-07-20 17:36:45,417 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:46,244 - sglang - INFO - [2025-07-20 17:36:46 TP0] Decode batch. #running-req: 1, #token: 2559, token usage: 0.07, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-07-20 17:36:46,244 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:46,854 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:46,855 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 425.02 469.36
- finished_output_tokens 102.64 112.60
- sglang_input_tokens 472.49 564.80
- sglang_output_tokens 120.58 143.21
- 2025-07-20 17:36:46,855 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:47,071 - sglang - INFO - [2025-07-20 17:36:47 TP0] Decode batch. #running-req: 1, #token: 2599, token usage: 0.07, gen throughput (token/s): 48.36, #queue-req: 0
- 2025-07-20 17:36:47,071 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:47,899 - sglang - INFO - [2025-07-20 17:36:47 TP0] Decode batch. #running-req: 1, #token: 2639, token usage: 0.07, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:36:47,899 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:48,727 - sglang - INFO - [2025-07-20 17:36:48 TP0] Decode batch. #running-req: 1, #token: 2679, token usage: 0.07, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:36:48,727 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:49,554 - sglang - INFO - [2025-07-20 17:36:49 TP0] Decode batch. #running-req: 1, #token: 2719, token usage: 0.07, gen throughput (token/s): 48.35, #queue-req: 0
- 2025-07-20 17:36:49,555 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:50,383 - sglang - INFO - [2025-07-20 17:36:50 TP0] Decode batch. #running-req: 1, #token: 2759, token usage: 0.07, gen throughput (token/s): 48.30, #queue-req: 0
- 2025-07-20 17:36:50,383 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:51,211 - sglang - INFO - [2025-07-20 17:36:51 TP0] Decode batch. #running-req: 1, #token: 2799, token usage: 0.07, gen throughput (token/s): 48.28, #queue-req: 0
- 2025-07-20 17:36:51,211 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:52,038 - sglang - INFO - [2025-07-20 17:36:52 TP0] Decode batch. #running-req: 1, #token: 2839, token usage: 0.07, gen throughput (token/s): 48.34, #queue-req: 0
- 2025-07-20 17:36:52,039 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:52,867 - sglang - INFO - [2025-07-20 17:36:52 TP0] Decode batch. #running-req: 1, #token: 2879, token usage: 0.08, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:36:52,867 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:53,695 - sglang - INFO - [2025-07-20 17:36:53 TP0] Decode batch. #running-req: 1, #token: 2919, token usage: 0.08, gen throughput (token/s): 48.26, #queue-req: 0
- 2025-07-20 17:36:53,696 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:54,524 - sglang - INFO - [2025-07-20 17:36:54 TP0] Decode batch. #running-req: 1, #token: 2959, token usage: 0.08, gen throughput (token/s): 48.28, #queue-req: 0
- 2025-07-20 17:36:54,524 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:55,352 - sglang - INFO - [2025-07-20 17:36:55 TP0] Decode batch. #running-req: 1, #token: 2999, token usage: 0.08, gen throughput (token/s): 48.30, #queue-req: 0
- 2025-07-20 17:36:55,352 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:56,182 - sglang - INFO - [2025-07-20 17:36:56 TP0] Decode batch. #running-req: 1, #token: 3039, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:36:56,182 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:56,856 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:36:56,857 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 419.20 469.36
- finished_output_tokens 101.24 112.60
- sglang_input_tokens 466.02 564.80
- sglang_output_tokens 118.93 143.21
- 2025-07-20 17:36:56,857 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:36:57,012 - sglang - INFO - [2025-07-20 17:36:57 TP0] Decode batch. #running-req: 1, #token: 3079, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:36:57,012 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:57,840 - sglang - INFO - [2025-07-20 17:36:57 TP0] Decode batch. #running-req: 1, #token: 3119, token usage: 0.08, gen throughput (token/s): 48.27, #queue-req: 0
- 2025-07-20 17:36:57,841 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:58,670 - sglang - INFO - [2025-07-20 17:36:58 TP0] Decode batch. #running-req: 1, #token: 3159, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
- 2025-07-20 17:36:58,670 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:36:59,500 - sglang - INFO - [2025-07-20 17:36:59 TP0] Decode batch. #running-req: 1, #token: 3199, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:36:59,500 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:00,330 - sglang - INFO - [2025-07-20 17:37:00 TP0] Decode batch. #running-req: 1, #token: 3239, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:37:00,330 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:01,159 - sglang - INFO - [2025-07-20 17:37:01 TP0] Decode batch. #running-req: 1, #token: 3279, token usage: 0.09, gen throughput (token/s): 48.24, #queue-req: 0
- 2025-07-20 17:37:01,159 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:01,988 - sglang - INFO - [2025-07-20 17:37:01 TP0] Decode batch. #running-req: 1, #token: 3319, token usage: 0.09, gen throughput (token/s): 48.25, #queue-req: 0
- 2025-07-20 17:37:01,988 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:02,819 - sglang - INFO - [2025-07-20 17:37:02 TP0] Decode batch. #running-req: 1, #token: 3359, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
- 2025-07-20 17:37:02,819 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:03,649 - sglang - INFO - [2025-07-20 17:37:03 TP0] Decode batch. #running-req: 1, #token: 3399, token usage: 0.09, gen throughput (token/s): 48.16, #queue-req: 0
- 2025-07-20 17:37:03,649 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:04,479 - sglang - INFO - [2025-07-20 17:37:04 TP0] Decode batch. #running-req: 1, #token: 3439, token usage: 0.09, gen throughput (token/s): 48.22, #queue-req: 0
- 2025-07-20 17:37:04,479 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:05,309 - sglang - INFO - [2025-07-20 17:37:05 TP0] Decode batch. #running-req: 1, #token: 3479, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:37:05,309 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:06,140 - sglang - INFO - [2025-07-20 17:37:06 TP0] Decode batch. #running-req: 1, #token: 3519, token usage: 0.09, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:37:06,140 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:06,858 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:37:06,859 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 413.54 469.36
- finished_output_tokens 99.87 112.60
- sglang_input_tokens 459.72 564.80
- sglang_output_tokens 117.32 143.21
- 2025-07-20 17:37:06,859 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:37:06,971 - sglang - INFO - [2025-07-20 17:37:06 TP0] Decode batch. #running-req: 1, #token: 3559, token usage: 0.09, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:37:06,971 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:07,802 - sglang - INFO - [2025-07-20 17:37:07 TP0] Decode batch. #running-req: 1, #token: 3599, token usage: 0.09, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:37:07,802 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:08,632 - sglang - INFO - [2025-07-20 17:37:08 TP0] Decode batch. #running-req: 1, #token: 3639, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
- 2025-07-20 17:37:08,633 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:09,464 - sglang - INFO - [2025-07-20 17:37:09 TP0] Decode batch. #running-req: 1, #token: 3679, token usage: 0.10, gen throughput (token/s): 48.07, #queue-req: 0
- 2025-07-20 17:37:09,465 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:10,296 - sglang - INFO - [2025-07-20 17:37:10 TP0] Decode batch. #running-req: 1, #token: 3719, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:37:10,296 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:11,127 - sglang - INFO - [2025-07-20 17:37:11 TP0] Decode batch. #running-req: 1, #token: 3759, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
- 2025-07-20 17:37:11,127 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:11,958 - sglang - INFO - [2025-07-20 17:37:11 TP0] Decode batch. #running-req: 1, #token: 3799, token usage: 0.10, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:37:11,959 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:12,791 - sglang - INFO - [2025-07-20 17:37:12 TP0] Decode batch. #running-req: 1, #token: 3839, token usage: 0.10, gen throughput (token/s): 48.06, #queue-req: 0
- 2025-07-20 17:37:12,791 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:13,623 - sglang - INFO - [2025-07-20 17:37:13 TP0] Decode batch. #running-req: 1, #token: 3879, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:37:13,623 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:14,454 - sglang - INFO - [2025-07-20 17:37:14 TP0] Decode batch. #running-req: 1, #token: 3919, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:37:14,454 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:15,287 - sglang - INFO - [2025-07-20 17:37:15 TP0] Decode batch. #running-req: 1, #token: 3959, token usage: 0.10, gen throughput (token/s): 48.05, #queue-req: 0
- 2025-07-20 17:37:15,287 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:16,119 - sglang - INFO - [2025-07-20 17:37:16 TP0] Decode batch. #running-req: 1, #token: 3999, token usage: 0.11, gen throughput (token/s): 48.05, #queue-req: 0
- 2025-07-20 17:37:16,119 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:16,860 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:37:16,861 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 408.03 469.36
- finished_output_tokens 98.54 112.60
- sglang_input_tokens 453.60 564.80
- sglang_output_tokens 115.76 143.21
- 2025-07-20 17:37:16,861 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:37:16,950 - sglang - INFO - [2025-07-20 17:37:16 TP0] Decode batch. #running-req: 1, #token: 4039, token usage: 0.11, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:37:16,951 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:17,782 - sglang - INFO - [2025-07-20 17:37:17 TP0] Decode batch. #running-req: 1, #token: 4079, token usage: 0.11, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:37:17,782 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:18,615 - sglang - INFO - [2025-07-20 17:37:18 TP0] Decode batch. #running-req: 1, #token: 4119, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:37:18,615 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:19,449 - sglang - INFO - [2025-07-20 17:37:19 TP0] Decode batch. #running-req: 1, #token: 4159, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:37:19,449 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:20,282 - sglang - INFO - [2025-07-20 17:37:20 TP0] Decode batch. #running-req: 1, #token: 4199, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
- 2025-07-20 17:37:20,282 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:21,114 - sglang - INFO - [2025-07-20 17:37:21 TP0] Decode batch. #running-req: 1, #token: 4239, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:37:21,114 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:21,947 - sglang - INFO - [2025-07-20 17:37:21 TP0] Decode batch. #running-req: 1, #token: 4279, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:37:21,948 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:22,781 - sglang - INFO - [2025-07-20 17:37:22 TP0] Decode batch. #running-req: 1, #token: 4319, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:37:22,781 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:23,614 - sglang - INFO - [2025-07-20 17:37:23 TP0] Decode batch. #running-req: 1, #token: 4359, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:37:23,614 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:24,447 - sglang - INFO - [2025-07-20 17:37:24 TP0] Decode batch. #running-req: 1, #token: 4399, token usage: 0.12, gen throughput (token/s): 47.98, #queue-req: 0
- 2025-07-20 17:37:24,447 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:25,281 - sglang - INFO - [2025-07-20 17:37:25 TP0] Decode batch. #running-req: 1, #token: 4439, token usage: 0.12, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-07-20 17:37:25,281 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:26,116 - sglang - INFO - [2025-07-20 17:37:26 TP0] Decode batch. #running-req: 1, #token: 4479, token usage: 0.12, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:37:26,116 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:26,863 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:37:26,863 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 402.66 469.36
- finished_output_tokens 97.24 112.60
- sglang_input_tokens 447.63 564.80
- sglang_output_tokens 114.24 143.21
- 2025-07-20 17:37:26,864 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:37:26,950 - sglang - INFO - [2025-07-20 17:37:26 TP0] Decode batch. #running-req: 1, #token: 4519, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:37:26,950 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:27,785 - sglang - INFO - [2025-07-20 17:37:27 TP0] Decode batch. #running-req: 1, #token: 4559, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:37:27,785 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:28,619 - sglang - INFO - [2025-07-20 17:37:28 TP0] Decode batch. #running-req: 1, #token: 4599, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:37:28,620 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:29,453 - sglang - INFO - [2025-07-20 17:37:29 TP0] Decode batch. #running-req: 1, #token: 4639, token usage: 0.12, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:37:29,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:30,289 - sglang - INFO - [2025-07-20 17:37:30 TP0] Decode batch. #running-req: 1, #token: 4679, token usage: 0.12, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:37:30,289 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:31,123 - sglang - INFO - [2025-07-20 17:37:31 TP0] Decode batch. #running-req: 1, #token: 4719, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:37:31,124 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:31,958 - sglang - INFO - [2025-07-20 17:37:31 TP0] Decode batch. #running-req: 1, #token: 4759, token usage: 0.13, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:37:31,958 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:32,794 - sglang - INFO - [2025-07-20 17:37:32 TP0] Decode batch. #running-req: 1, #token: 4799, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:37:32,794 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:33,629 - sglang - INFO - [2025-07-20 17:37:33 TP0] Decode batch. #running-req: 1, #token: 4839, token usage: 0.13, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:37:33,629 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:34,464 - sglang - INFO - [2025-07-20 17:37:34 TP0] Decode batch. #running-req: 1, #token: 4879, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:37:34,464 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:35,299 - sglang - INFO - [2025-07-20 17:37:35 TP0] Decode batch. #running-req: 1, #token: 4919, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:37:35,299 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:36,135 - sglang - INFO - [2025-07-20 17:37:36 TP0] Decode batch. #running-req: 1, #token: 4959, token usage: 0.13, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:37:36,135 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:36,865 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:37:36,866 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 397.43 469.36
- finished_output_tokens 95.98 112.60
- sglang_input_tokens 441.82 564.80
- sglang_output_tokens 112.75 143.21
- 2025-07-20 17:37:36,866 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:37:36,971 - sglang - INFO - [2025-07-20 17:37:36 TP0] Decode batch. #running-req: 1, #token: 4999, token usage: 0.13, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:37:36,971 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:37,806 - sglang - INFO - [2025-07-20 17:37:37 TP0] Decode batch. #running-req: 1, #token: 5039, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:37:37,807 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:38,642 - sglang - INFO - [2025-07-20 17:37:38 TP0] Decode batch. #running-req: 1, #token: 5079, token usage: 0.13, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:37:38,643 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:39,005 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG344010604301201.pdf-10: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:37:39,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
- 2025-07-20 17:37:39,321 - sglang - INFO - [2025-07-20 17:37:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2097, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:37:39,321 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:37:40,474 - sglang - INFO - [2025-07-20 17:37:40 TP0] Decode batch. #running-req: 1, #token: 2120, token usage: 0.06, gen throughput (token/s): 21.83, #queue-req: 0
- 2025-07-20 17:37:40,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:41,301 - sglang - INFO - [2025-07-20 17:37:41 TP0] Decode batch. #running-req: 1, #token: 2160, token usage: 0.06, gen throughput (token/s): 48.39, #queue-req: 0
- 2025-07-20 17:37:41,301 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:42,128 - sglang - INFO - [2025-07-20 17:37:42 TP0] Decode batch. #running-req: 1, #token: 2200, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
- 2025-07-20 17:37:42,128 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:42,956 - sglang - INFO - [2025-07-20 17:37:42 TP0] Decode batch. #running-req: 1, #token: 2240, token usage: 0.06, gen throughput (token/s): 48.32, #queue-req: 0
- 2025-07-20 17:37:42,956 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:43,784 - sglang - INFO - [2025-07-20 17:37:43 TP0] Decode batch. #running-req: 1, #token: 2280, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:37:43,784 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:44,611 - sglang - INFO - [2025-07-20 17:37:44 TP0] Decode batch. #running-req: 1, #token: 2320, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
- 2025-07-20 17:37:44,611 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:45,438 - sglang - INFO - [2025-07-20 17:37:45 TP0] Decode batch. #running-req: 1, #token: 2360, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
- 2025-07-20 17:37:45,438 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:46,266 - sglang - INFO - [2025-07-20 17:37:46 TP0] Decode batch. #running-req: 1, #token: 2400, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
- 2025-07-20 17:37:46,266 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:46,867 - __main__ - INFO - Queue remaining: 9
- 2025-07-20 17:37:46,868 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 392.34 347.11
- finished_output_tokens 94.75 82.61
- sglang_input_tokens 438.85 456.02
- sglang_output_tokens 115.15 124.80
- 2025-07-20 17:37:46,868 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 13 | 14
- 2025-07-20 17:37:47,095 - sglang - INFO - [2025-07-20 17:37:47 TP0] Decode batch. #running-req: 1, #token: 2440, token usage: 0.06, gen throughput (token/s): 48.28, #queue-req: 0
- 2025-07-20 17:37:47,095 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:47,576 - __main__ - INFO - Finished TaskGroup for worker on 06798e8f7cc26525f138f26354ffab7c63074f2c
- 2025-07-20 17:37:47,576 - __main__ - INFO - Got 1 docs for 06798e8f7cc26525f138f26354ffab7c63074f2c
- 2025-07-20 17:37:47,578 - __main__ - INFO - Worker 0 processing work item c07c41e4c78e5049d035d0059223ac0adc60be49
- 2025-07-20 17:37:47,578 - __main__ - INFO - Created all tasks for c07c41e4c78e5049d035d0059223ac0adc60be49
- 2025-07-20 17:37:47,584 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
- 2025-07-20 17:37:47,712 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
- 2025-07-20 17:37:47,718 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
- 2025-07-20 17:37:47,723 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
- 2025-07-20 17:37:47,727 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
- 2025-07-20 17:37:47,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
- 2025-07-20 17:37:47,814 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
- 2025-07-20 17:37:47,888 - sglang - INFO - [2025-07-20 17:37:47 TP0] Prefill batch. #new-seq: 1, #new-token: 1735, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:37:47,889 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:37:48,506 - sglang - INFO - [2025-07-20 17:37:48 TP0] Prefill batch. #new-seq: 5, #new-token: 10414, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-07-20 17:37:48,506 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:37:52,075 - sglang - INFO - [2025-07-20 17:37:52 TP0] Decode batch. #running-req: 6, #token: 12251, token usage: 0.32, gen throughput (token/s): 25.10, #queue-req: 0
- 2025-07-20 17:37:52,075 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:52,947 - sglang - INFO - [2025-07-20 17:37:52 TP0] Decode batch. #running-req: 6, #token: 12491, token usage: 0.33, gen throughput (token/s): 275.05, #queue-req: 0
- 2025-07-20 17:37:52,947 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:53,819 - sglang - INFO - [2025-07-20 17:37:53 TP0] Decode batch. #running-req: 6, #token: 12731, token usage: 0.34, gen throughput (token/s): 275.14, #queue-req: 0
- 2025-07-20 17:37:53,820 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:54,691 - sglang - INFO - [2025-07-20 17:37:54 TP0] Decode batch. #running-req: 6, #token: 12971, token usage: 0.34, gen throughput (token/s): 275.26, #queue-req: 0
- 2025-07-20 17:37:54,691 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:55,565 - sglang - INFO - [2025-07-20 17:37:55 TP0] Decode batch. #running-req: 6, #token: 13211, token usage: 0.35, gen throughput (token/s): 274.74, #queue-req: 0
- 2025-07-20 17:37:55,565 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:56,439 - sglang - INFO - [2025-07-20 17:37:56 TP0] Decode batch. #running-req: 6, #token: 13451, token usage: 0.35, gen throughput (token/s): 274.53, #queue-req: 0
- 2025-07-20 17:37:56,439 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:37:56,869 - __main__ - INFO - Queue remaining: 8
- 2025-07-20 17:37:56,869 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 427.89 411.29
- finished_output_tokens 102.78 93.83
- sglang_input_tokens 437.79 430.13
- sglang_output_tokens 114.45 114.60
- 2025-07-20 17:37:56,870 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 6
- 2025-07-20 17:37:57,311 - sglang - INFO - [2025-07-20 17:37:57 TP0] Decode batch. #running-req: 5, #token: 11975, token usage: 0.32, gen throughput (token/s): 244.21, #queue-req: 0
- 2025-07-20 17:37:57,312 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:37:58,178 - sglang - INFO - [2025-07-20 17:37:58 TP0] Decode batch. #running-req: 4, #token: 9769, token usage: 0.26, gen throughput (token/s): 215.78, #queue-req: 0
- 2025-07-20 17:37:58,178 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:37:59,035 - sglang - INFO - [2025-07-20 17:37:59 TP0] Decode batch. #running-req: 4, #token: 9929, token usage: 0.26, gen throughput (token/s): 186.75, #queue-req: 0
- 2025-07-20 17:37:59,035 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:37:59,889 - sglang - INFO - [2025-07-20 17:37:59 TP0] Decode batch. #running-req: 2, #token: 5608, token usage: 0.15, gen throughput (token/s): 156.80, #queue-req: 0
- 2025-07-20 17:37:59,890 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:00,729 - sglang - INFO - [2025-07-20 17:38:00 TP0] Decode batch. #running-req: 2, #token: 5688, token usage: 0.15, gen throughput (token/s): 95.28, #queue-req: 0
- 2025-07-20 17:38:00,729 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:01,567 - sglang - INFO - [2025-07-20 17:38:01 TP0] Decode batch. #running-req: 2, #token: 5768, token usage: 0.15, gen throughput (token/s): 95.41, #queue-req: 0
- 2025-07-20 17:38:01,568 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:02,406 - sglang - INFO - [2025-07-20 17:38:02 TP0] Decode batch. #running-req: 2, #token: 5848, token usage: 0.15, gen throughput (token/s): 95.38, #queue-req: 0
- 2025-07-20 17:38:02,406 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:03,246 - sglang - INFO - [2025-07-20 17:38:03 TP0] Decode batch. #running-req: 2, #token: 5928, token usage: 0.16, gen throughput (token/s): 95.27, #queue-req: 0
- 2025-07-20 17:38:03,246 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:04,086 - sglang - INFO - [2025-07-20 17:38:04 TP0] Decode batch. #running-req: 2, #token: 6008, token usage: 0.16, gen throughput (token/s): 95.17, #queue-req: 0
- 2025-07-20 17:38:04,087 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:04,926 - sglang - INFO - [2025-07-20 17:38:04 TP0] Decode batch. #running-req: 2, #token: 6088, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
- 2025-07-20 17:38:04,926 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:05,756 - sglang - INFO - [2025-07-20 17:38:05 TP0] Decode batch. #running-req: 1, #token: 2983, token usage: 0.08, gen throughput (token/s): 50.61, #queue-req: 0
- 2025-07-20 17:38:05,756 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:06,585 - sglang - INFO - [2025-07-20 17:38:06 TP0] Decode batch. #running-req: 1, #token: 3023, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
- 2025-07-20 17:38:06,586 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:06,872 - __main__ - INFO - Queue remaining: 8
- 2025-07-20 17:38:06,872 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 422.55 411.29
- finished_output_tokens 101.49 93.83
- sglang_input_tokens 442.77 440.47
- sglang_output_tokens 115.06 117.34
- 2025-07-20 17:38:06,873 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 6
- 2025-07-20 17:38:07,421 - sglang - INFO - [2025-07-20 17:38:07 TP0] Decode batch. #running-req: 1, #token: 3063, token usage: 0.08, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:38:07,421 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:08,254 - sglang - INFO - [2025-07-20 17:38:08 TP0] Decode batch. #running-req: 1, #token: 3103, token usage: 0.08, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:38:08,254 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:09,083 - sglang - INFO - [2025-07-20 17:38:09 TP0] Decode batch. #running-req: 1, #token: 3143, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
- 2025-07-20 17:38:09,084 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:09,588 - __main__ - INFO - Finished TaskGroup for worker on c07c41e4c78e5049d035d0059223ac0adc60be49
- 2025-07-20 17:38:09,589 - __main__ - INFO - Got 1 docs for c07c41e4c78e5049d035d0059223ac0adc60be49
- 2025-07-20 17:38:09,590 - __main__ - INFO - Worker 0 processing work item 8450bc4e95932e232e795c885ec59ab601993cab
- 2025-07-20 17:38:09,590 - __main__ - INFO - Created all tasks for 8450bc4e95932e232e795c885ec59ab601993cab
- 2025-07-20 17:38:09,598 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
- 2025-07-20 17:38:09,706 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
- 2025-07-20 17:38:09,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
- 2025-07-20 17:38:09,767 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
- 2025-07-20 17:38:09,776 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
- 2025-07-20 17:38:09,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
- 2025-07-20 17:38:09,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
- 2025-07-20 17:38:09,874 - sglang - INFO - [2025-07-20 17:38:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1457, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:38:09,874 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:38:10,451 - sglang - INFO - [2025-07-20 17:38:10 TP0] Prefill batch. #new-seq: 5, #new-token: 11323, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:38:10,452 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:14,207 - sglang - INFO - [2025-07-20 17:38:14 TP0] Decode batch. #running-req: 6, #token: 12876, token usage: 0.34, gen throughput (token/s): 23.42, #queue-req: 0
- 2025-07-20 17:38:14,207 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:15,080 - sglang - INFO - [2025-07-20 17:38:15 TP0] Decode batch. #running-req: 6, #token: 13116, token usage: 0.35, gen throughput (token/s): 274.58, #queue-req: 0
- 2025-07-20 17:38:15,081 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:15,955 - sglang - INFO - [2025-07-20 17:38:15 TP0] Decode batch. #running-req: 6, #token: 13356, token usage: 0.35, gen throughput (token/s): 274.58, #queue-req: 0
- 2025-07-20 17:38:15,955 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:16,828 - sglang - INFO - [2025-07-20 17:38:16 TP0] Decode batch. #running-req: 6, #token: 13596, token usage: 0.36, gen throughput (token/s): 274.75, #queue-req: 0
- 2025-07-20 17:38:16,828 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:16,874 - __main__ - INFO - Queue remaining: 7
- 2025-07-20 17:38:16,874 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 432.32 451.78
- finished_output_tokens 103.58 102.86
- sglang_input_tokens 440.18 442.48
- sglang_output_tokens 114.68 118.94
- 2025-07-20 17:38:16,874 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:38:17,704 - sglang - INFO - [2025-07-20 17:38:17 TP0] Decode batch. #running-req: 6, #token: 13836, token usage: 0.36, gen throughput (token/s): 274.17, #queue-req: 0
- 2025-07-20 17:38:17,704 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:18,580 - sglang - INFO - [2025-07-20 17:38:18 TP0] Decode batch. #running-req: 6, #token: 14076, token usage: 0.37, gen throughput (token/s): 273.80, #queue-req: 0
- 2025-07-20 17:38:18,580 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:19,458 - sglang - INFO - [2025-07-20 17:38:19 TP0] Decode batch. #running-req: 5, #token: 11713, token usage: 0.31, gen throughput (token/s): 268.91, #queue-req: 0
- 2025-07-20 17:38:19,458 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:38:20,319 - sglang - INFO - [2025-07-20 17:38:20 TP0] Decode batch. #running-req: 4, #token: 10160, token usage: 0.27, gen throughput (token/s): 198.44, #queue-req: 0
- 2025-07-20 17:38:20,320 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:21,177 - sglang - INFO - [2025-07-20 17:38:21 TP0] Decode batch. #running-req: 4, #token: 10320, token usage: 0.27, gen throughput (token/s): 186.49, #queue-req: 0
- 2025-07-20 17:38:21,178 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:22,035 - sglang - INFO - [2025-07-20 17:38:22 TP0] Decode batch. #running-req: 4, #token: 10480, token usage: 0.28, gen throughput (token/s): 186.46, #queue-req: 0
- 2025-07-20 17:38:22,036 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:22,895 - sglang - INFO - [2025-07-20 17:38:22 TP0] Decode batch. #running-req: 4, #token: 10640, token usage: 0.28, gen throughput (token/s): 186.12, #queue-req: 0
- 2025-07-20 17:38:22,895 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:23,755 - sglang - INFO - [2025-07-20 17:38:23 TP0] Decode batch. #running-req: 4, #token: 10800, token usage: 0.28, gen throughput (token/s): 186.09, #queue-req: 0
- 2025-07-20 17:38:23,755 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:24,614 - sglang - INFO - [2025-07-20 17:38:24 TP0] Decode batch. #running-req: 4, #token: 10960, token usage: 0.29, gen throughput (token/s): 186.30, #queue-req: 0
- 2025-07-20 17:38:24,614 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:25,473 - sglang - INFO - [2025-07-20 17:38:25 TP0] Decode batch. #running-req: 4, #token: 11120, token usage: 0.29, gen throughput (token/s): 186.27, #queue-req: 0
- 2025-07-20 17:38:25,473 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:26,321 - sglang - INFO - [2025-07-20 17:38:26 TP0] Decode batch. #running-req: 2, #token: 6149, token usage: 0.16, gen throughput (token/s): 129.65, #queue-req: 0
- 2025-07-20 17:38:26,321 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:26,875 - __main__ - INFO - Queue remaining: 7
- 2025-07-20 17:38:26,875 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 427.05 412.09
- finished_output_tokens 102.32 94.25
- sglang_input_tokens 444.30 439.13
- sglang_output_tokens 115.27 117.64
- 2025-07-20 17:38:26,876 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 6
- 2025-07-20 17:38:27,162 - sglang - INFO - [2025-07-20 17:38:27 TP0] Decode batch. #running-req: 2, #token: 6229, token usage: 0.16, gen throughput (token/s): 95.16, #queue-req: 0
- 2025-07-20 17:38:27,162 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:28,002 - sglang - INFO - [2025-07-20 17:38:28 TP0] Decode batch. #running-req: 2, #token: 6309, token usage: 0.17, gen throughput (token/s): 95.19, #queue-req: 0
- 2025-07-20 17:38:28,003 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:28,835 - sglang - INFO - [2025-07-20 17:38:28 TP0] Decode batch. #running-req: 1, #token: 3120, token usage: 0.08, gen throughput (token/s): 60.02, #queue-req: 0
- 2025-07-20 17:38:28,836 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:29,665 - sglang - INFO - [2025-07-20 17:38:29 TP0] Decode batch. #running-req: 1, #token: 3160, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
- 2025-07-20 17:38:29,665 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:30,496 - sglang - INFO - [2025-07-20 17:38:30 TP0] Decode batch. #running-req: 1, #token: 3200, token usage: 0.08, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:38:30,497 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:30,751 - __main__ - INFO - Finished TaskGroup for worker on 8450bc4e95932e232e795c885ec59ab601993cab
- 2025-07-20 17:38:30,751 - __main__ - INFO - Got 1 docs for 8450bc4e95932e232e795c885ec59ab601993cab
- 2025-07-20 17:38:30,753 - __main__ - INFO - Worker 0 processing work item 662cdaa711447efb75b7c325ea177326afc2747b
- 2025-07-20 17:38:30,753 - __main__ - INFO - Created all tasks for 662cdaa711447efb75b7c325ea177326afc2747b
- 2025-07-20 17:38:30,761 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
- 2025-07-20 17:38:30,870 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
- 2025-07-20 17:38:30,881 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
- 2025-07-20 17:38:30,891 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
- 2025-07-20 17:38:30,905 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
- 2025-07-20 17:38:30,910 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
- 2025-07-20 17:38:30,914 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
- 2025-07-20 17:38:30,919 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
- 2025-07-20 17:38:30,942 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
- 2025-07-20 17:38:30,960 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
- 2025-07-20 17:38:31,000 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
- 2025-07-20 17:38:31,051 - sglang - INFO - [2025-07-20 17:38:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1488, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:38:31,051 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:38:31,639 - sglang - INFO - [2025-07-20 17:38:31 TP0] Prefill batch. #new-seq: 6, #new-token: 11875, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 3
- 2025-07-20 17:38:31,640 - __main__ - INFO - sglang running req: 1 queue req: 3
- 2025-07-20 17:38:35,961 - sglang - INFO - [2025-07-20 17:38:35 TP0] Decode batch. #running-req: 7, #token: 13559, token usage: 0.36, gen throughput (token/s): 38.06, #queue-req: 3
- 2025-07-20 17:38:35,961 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:38:36,837 - sglang - INFO - [2025-07-20 17:38:36 TP0] Decode batch. #running-req: 7, #token: 13839, token usage: 0.36, gen throughput (token/s): 319.49, #queue-req: 3
- 2025-07-20 17:38:36,838 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:38:36,877 - __main__ - INFO - Queue remaining: 6
- 2025-07-20 17:38:36,877 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 437.30 454.69
- finished_output_tokens 104.80 104.52
- sglang_input_tokens 444.96 449.87
- sglang_output_tokens 115.63 121.25
- 2025-07-20 17:38:36,877 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-07-20 17:38:37,715 - sglang - INFO - [2025-07-20 17:38:37 TP0] Decode batch. #running-req: 7, #token: 14119, token usage: 0.37, gen throughput (token/s): 319.15, #queue-req: 3
- 2025-07-20 17:38:37,715 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:38:38,592 - sglang - INFO - [2025-07-20 17:38:38 TP0] Decode batch. #running-req: 7, #token: 14399, token usage: 0.38, gen throughput (token/s): 319.03, #queue-req: 3
- 2025-07-20 17:38:38,593 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:38:39,470 - sglang - INFO - [2025-07-20 17:38:39 TP0] Decode batch. #running-req: 7, #token: 14679, token usage: 0.39, gen throughput (token/s): 319.13, #queue-req: 3
- 2025-07-20 17:38:39,470 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:38:40,261 - sglang - INFO - [2025-07-20 17:38:40 TP0] Prefill batch. #new-seq: 3, #new-token: 6692, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.34, #running-req: 6, #queue-req: 0
- 2025-07-20 17:38:40,261 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:42,417 - sglang - INFO - [2025-07-20 17:38:42 TP0] Decode batch. #running-req: 9, #token: 19574, token usage: 0.52, gen throughput (token/s): 97.37, #queue-req: 0
- 2025-07-20 17:38:42,418 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:38:43,359 - sglang - INFO - [2025-07-20 17:38:43 TP0] Decode batch. #running-req: 9, #token: 18178, token usage: 0.48, gen throughput (token/s): 382.21, #queue-req: 0
- 2025-07-20 17:38:43,360 - __main__ - INFO - sglang running req: 9 queue req: 0
- 2025-07-20 17:38:44,241 - sglang - INFO - [2025-07-20 17:38:44 TP0] Decode batch. #running-req: 6, #token: 14263, token usage: 0.38, gen throughput (token/s): 316.43, #queue-req: 0
- 2025-07-20 17:38:44,241 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:45,119 - sglang - INFO - [2025-07-20 17:38:45 TP0] Decode batch. #running-req: 6, #token: 14503, token usage: 0.38, gen throughput (token/s): 273.35, #queue-req: 0
- 2025-07-20 17:38:45,119 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:38:45,998 - sglang - INFO - [2025-07-20 17:38:45 TP0] Decode batch. #running-req: 5, #token: 12142, token usage: 0.32, gen throughput (token/s): 269.58, #queue-req: 0
- 2025-07-20 17:38:45,998 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:38:46,856 - sglang - INFO - [2025-07-20 17:38:46 TP0] Decode batch. #running-req: 4, #token: 9743, token usage: 0.26, gen throughput (token/s): 192.32, #queue-req: 0
- 2025-07-20 17:38:46,856 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:46,879 - __main__ - INFO - Queue remaining: 6
- 2025-07-20 17:38:46,879 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 432.09 419.63
- finished_output_tokens 103.55 96.48
- sglang_input_tokens 453.17 471.45
- sglang_output_tokens 116.46 122.65
- 2025-07-20 17:38:46,879 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 6 | 10
- 2025-07-20 17:38:47,712 - sglang - INFO - [2025-07-20 17:38:47 TP0] Decode batch. #running-req: 4, #token: 9903, token usage: 0.26, gen throughput (token/s): 186.97, #queue-req: 0
- 2025-07-20 17:38:47,712 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:48,568 - sglang - INFO - [2025-07-20 17:38:48 TP0] Decode batch. #running-req: 4, #token: 10063, token usage: 0.26, gen throughput (token/s): 186.87, #queue-req: 0
- 2025-07-20 17:38:48,568 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:49,426 - sglang - INFO - [2025-07-20 17:38:49 TP0] Decode batch. #running-req: 4, #token: 10223, token usage: 0.27, gen throughput (token/s): 186.52, #queue-req: 0
- 2025-07-20 17:38:49,426 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:38:50,280 - sglang - INFO - [2025-07-20 17:38:50 TP0] Decode batch. #running-req: 3, #token: 7784, token usage: 0.20, gen throughput (token/s): 153.36, #queue-req: 0
- 2025-07-20 17:38:50,280 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:38:51,131 - sglang - INFO - [2025-07-20 17:38:51 TP0] Decode batch. #running-req: 3, #token: 7904, token usage: 0.21, gen throughput (token/s): 141.07, #queue-req: 0
- 2025-07-20 17:38:51,131 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:38:51,977 - sglang - INFO - [2025-07-20 17:38:51 TP0] Decode batch. #running-req: 2, #token: 5697, token usage: 0.15, gen throughput (token/s): 120.60, #queue-req: 0
- 2025-07-20 17:38:51,977 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:52,815 - sglang - INFO - [2025-07-20 17:38:52 TP0] Decode batch. #running-req: 2, #token: 5777, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
- 2025-07-20 17:38:52,815 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:53,655 - sglang - INFO - [2025-07-20 17:38:53 TP0] Decode batch. #running-req: 2, #token: 5857, token usage: 0.15, gen throughput (token/s): 95.20, #queue-req: 0
- 2025-07-20 17:38:53,656 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:54,495 - sglang - INFO - [2025-07-20 17:38:54 TP0] Decode batch. #running-req: 2, #token: 5937, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
- 2025-07-20 17:38:54,496 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:55,334 - sglang - INFO - [2025-07-20 17:38:55 TP0] Decode batch. #running-req: 2, #token: 6017, token usage: 0.16, gen throughput (token/s): 95.37, #queue-req: 0
- 2025-07-20 17:38:55,334 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:38:56,171 - sglang - INFO - [2025-07-20 17:38:56 TP0] Decode batch. #running-req: 1, #token: 2906, token usage: 0.08, gen throughput (token/s): 81.24, #queue-req: 0
- 2025-07-20 17:38:56,171 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:56,880 - __main__ - INFO - Queue remaining: 6
- 2025-07-20 17:38:56,881 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 427.01 368.21
- finished_output_tokens 102.33 84.32
- sglang_input_tokens 455.42 441.50
- sglang_output_tokens 116.99 115.89
- 2025-07-20 17:38:56,881 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 10
- 2025-07-20 17:38:57,002 - sglang - INFO - [2025-07-20 17:38:57 TP0] Decode batch. #running-req: 1, #token: 2946, token usage: 0.08, gen throughput (token/s): 48.15, #queue-req: 0
- 2025-07-20 17:38:57,002 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:57,832 - sglang - INFO - [2025-07-20 17:38:57 TP0] Decode batch. #running-req: 1, #token: 2986, token usage: 0.08, gen throughput (token/s): 48.18, #queue-req: 0
- 2025-07-20 17:38:57,832 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:58,662 - sglang - INFO - [2025-07-20 17:38:58 TP0] Decode batch. #running-req: 1, #token: 3026, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
- 2025-07-20 17:38:58,662 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:59,491 - sglang - INFO - [2025-07-20 17:38:59 TP0] Decode batch. #running-req: 1, #token: 3066, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
- 2025-07-20 17:38:59,492 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:38:59,955 - __main__ - INFO - Finished TaskGroup for worker on 662cdaa711447efb75b7c325ea177326afc2747b
- 2025-07-20 17:38:59,956 - __main__ - INFO - Got 1 docs for 662cdaa711447efb75b7c325ea177326afc2747b
- 2025-07-20 17:38:59,957 - __main__ - INFO - Worker 0 processing work item c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
- 2025-07-20 17:38:59,957 - __main__ - INFO - Created all tasks for c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
- 2025-07-20 17:38:59,966 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
- 2025-07-20 17:39:00,096 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
- 2025-07-20 17:39:00,115 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
- 2025-07-20 17:39:00,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
- 2025-07-20 17:39:00,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
- 2025-07-20 17:39:00,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
- 2025-07-20 17:39:00,154 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
- 2025-07-20 17:39:00,156 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
- 2025-07-20 17:39:00,173 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
- 2025-07-20 17:39:00,217 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
- 2025-07-20 17:39:00,221 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
- 2025-07-20 17:39:00,282 - sglang - INFO - [2025-07-20 17:39:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:39:00,283 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:39:00,918 - sglang - INFO - [2025-07-20 17:39:00 TP0] Prefill batch. #new-seq: 6, #new-token: 14500, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 3
- 2025-07-20 17:39:00,918 - __main__ - INFO - sglang running req: 1 queue req: 3
- 2025-07-20 17:39:05,575 - sglang - INFO - [2025-07-20 17:39:05 TP0] Decode batch. #running-req: 7, #token: 16358, token usage: 0.43, gen throughput (token/s): 24.33, #queue-req: 3
- 2025-07-20 17:39:05,575 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:39:06,458 - sglang - INFO - [2025-07-20 17:39:06 TP0] Decode batch. #running-req: 7, #token: 16638, token usage: 0.44, gen throughput (token/s): 316.94, #queue-req: 3
- 2025-07-20 17:39:06,458 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:39:06,882 - __main__ - INFO - Queue remaining: 5
- 2025-07-20 17:39:06,882 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 445.35 435.06
- finished_output_tokens 106.13 98.65
- sglang_input_tokens 452.75 449.04
- sglang_output_tokens 116.59 118.65
- 2025-07-20 17:39:06,883 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 10
- 2025-07-20 17:39:07,342 - sglang - INFO - [2025-07-20 17:39:07 TP0] Decode batch. #running-req: 7, #token: 16918, token usage: 0.45, gen throughput (token/s): 316.93, #queue-req: 3
- 2025-07-20 17:39:07,342 - __main__ - INFO - sglang running req: 7 queue req: 3
- 2025-07-20 17:39:07,674 - sglang - INFO - [2025-07-20 17:39:07 TP0] Prefill batch. #new-seq: 2, #new-token: 4506, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 1
- 2025-07-20 17:39:07,675 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-07-20 17:39:09,632 - sglang - INFO - [2025-07-20 17:39:09 TP0] Decode batch. #running-req: 8, #token: 19254, token usage: 0.51, gen throughput (token/s): 132.75, #queue-req: 1
- 2025-07-20 17:39:09,632 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:39:10,526 - sglang - INFO - [2025-07-20 17:39:10 TP0] Decode batch. #running-req: 8, #token: 19574, token usage: 0.52, gen throughput (token/s): 357.91, #queue-req: 1
- 2025-07-20 17:39:10,526 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:39:11,418 - sglang - INFO - [2025-07-20 17:39:11 TP0] Decode batch. #running-req: 8, #token: 19894, token usage: 0.52, gen throughput (token/s): 358.68, #queue-req: 1
- 2025-07-20 17:39:11,418 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:39:12,310 - sglang - INFO - [2025-07-20 17:39:12 TP0] Decode batch. #running-req: 8, #token: 20214, token usage: 0.53, gen throughput (token/s): 358.86, #queue-req: 1
- 2025-07-20 17:39:12,310 - __main__ - INFO - sglang running req: 8 queue req: 1
- 2025-07-20 17:39:12,467 - sglang - INFO - [2025-07-20 17:39:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2315, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.48, #running-req: 7, #queue-req: 0
- 2025-07-20 17:39:12,467 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:39:13,948 - sglang - INFO - [2025-07-20 17:39:13 TP0] Decode batch. #running-req: 8, #token: 20852, token usage: 0.55, gen throughput (token/s): 194.71, #queue-req: 0
- 2025-07-20 17:39:13,948 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:39:14,844 - sglang - INFO - [2025-07-20 17:39:14 TP0] Decode batch. #running-req: 8, #token: 21172, token usage: 0.56, gen throughput (token/s): 357.41, #queue-req: 0
- 2025-07-20 17:39:14,844 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:39:15,739 - sglang - INFO - [2025-07-20 17:39:15 TP0] Decode batch. #running-req: 7, #token: 18815, token usage: 0.50, gen throughput (token/s): 350.61, #queue-req: 0
- 2025-07-20 17:39:15,739 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:39:16,628 - sglang - INFO - [2025-07-20 17:39:16 TP0] Decode batch. #running-req: 7, #token: 19095, token usage: 0.50, gen throughput (token/s): 314.88, #queue-req: 0
- 2025-07-20 17:39:16,629 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:39:16,884 - __main__ - INFO - Queue remaining: 5
- 2025-07-20 17:39:16,885 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 440.24 435.06
- finished_output_tokens 104.92 98.65
- sglang_input_tokens 454.89 420.30
- sglang_output_tokens 116.12 110.53
- 2025-07-20 17:39:16,885 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 3 | 10
- 2025-07-20 17:39:17,516 - sglang - INFO - [2025-07-20 17:39:17 TP0] Decode batch. #running-req: 6, #token: 16938, token usage: 0.45, gen throughput (token/s): 305.12, #queue-req: 0
- 2025-07-20 17:39:17,517 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:18,409 - sglang - INFO - [2025-07-20 17:39:18 TP0] Decode batch. #running-req: 6, #token: 17178, token usage: 0.45, gen throughput (token/s): 268.88, #queue-req: 0
- 2025-07-20 17:39:18,409 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:19,307 - sglang - INFO - [2025-07-20 17:39:19 TP0] Decode batch. #running-req: 6, #token: 17418, token usage: 0.46, gen throughput (token/s): 267.34, #queue-req: 0
- 2025-07-20 17:39:19,307 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:20,206 - sglang - INFO - [2025-07-20 17:39:20 TP0] Decode batch. #running-req: 6, #token: 17658, token usage: 0.46, gen throughput (token/s): 266.96, #queue-req: 0
- 2025-07-20 17:39:20,206 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:21,101 - sglang - INFO - [2025-07-20 17:39:21 TP0] Decode batch. #running-req: 5, #token: 11556, token usage: 0.30, gen throughput (token/s): 244.71, #queue-req: 0
- 2025-07-20 17:39:21,101 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:21,970 - sglang - INFO - [2025-07-20 17:39:21 TP0] Decode batch. #running-req: 3, #token: 9008, token usage: 0.24, gen throughput (token/s): 155.35, #queue-req: 0
- 2025-07-20 17:39:21,970 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:39:22,836 - sglang - INFO - [2025-07-20 17:39:22 TP0] Decode batch. #running-req: 2, #token: 6354, token usage: 0.17, gen throughput (token/s): 137.39, #queue-req: 0
- 2025-07-20 17:39:22,836 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:23,688 - sglang - INFO - [2025-07-20 17:39:23 TP0] Decode batch. #running-req: 2, #token: 6434, token usage: 0.17, gen throughput (token/s): 93.90, #queue-req: 0
- 2025-07-20 17:39:23,688 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:24,539 - sglang - INFO - [2025-07-20 17:39:24 TP0] Decode batch. #running-req: 2, #token: 6514, token usage: 0.17, gen throughput (token/s): 93.97, #queue-req: 0
- 2025-07-20 17:39:24,540 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:25,384 - sglang - INFO - [2025-07-20 17:39:25 TP0] Decode batch. #running-req: 1, #token: 3022, token usage: 0.08, gen throughput (token/s): 62.72, #queue-req: 0
- 2025-07-20 17:39:25,384 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:26,226 - sglang - INFO - [2025-07-20 17:39:26 TP0] Decode batch. #running-req: 1, #token: 3062, token usage: 0.08, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:39:26,226 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:26,887 - __main__ - INFO - Queue remaining: 5
- 2025-07-20 17:39:26,887 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 435.24 370.79
- finished_output_tokens 103.72 84.06
- sglang_input_tokens 466.01 453.89
- sglang_output_tokens 118.67 117.94
- 2025-07-20 17:39:26,887 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 9 | 10
- 2025-07-20 17:39:27,067 - sglang - INFO - [2025-07-20 17:39:27 TP0] Decode batch. #running-req: 1, #token: 3102, token usage: 0.08, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:39:27,067 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:27,910 - sglang - INFO - [2025-07-20 17:39:27 TP0] Decode batch. #running-req: 1, #token: 3142, token usage: 0.08, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:39:27,910 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:28,753 - sglang - INFO - [2025-07-20 17:39:28 TP0] Decode batch. #running-req: 1, #token: 3182, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:39:28,753 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:29,307 - __main__ - INFO - Finished TaskGroup for worker on c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
- 2025-07-20 17:39:29,307 - __main__ - INFO - Got 1 docs for c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
- 2025-07-20 17:39:29,309 - __main__ - INFO - Worker 0 processing work item 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
- 2025-07-20 17:39:29,309 - __main__ - INFO - Created all tasks for 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
- 2025-07-20 17:39:29,315 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
- 2025-07-20 17:39:29,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
- 2025-07-20 17:39:29,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
- 2025-07-20 17:39:29,471 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
- 2025-07-20 17:39:29,500 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
- 2025-07-20 17:39:29,558 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
- 2025-07-20 17:39:29,575 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
- 2025-07-20 17:39:29,736 - sglang - INFO - [2025-07-20 17:39:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1592, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:39:29,737 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:39:30,331 - sglang - INFO - [2025-07-20 17:39:30 TP0] Prefill batch. #new-seq: 5, #new-token: 11246, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:39:30,331 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:34,039 - sglang - INFO - [2025-07-20 17:39:34 TP0] Decode batch. #running-req: 6, #token: 12922, token usage: 0.34, gen throughput (token/s): 20.81, #queue-req: 0
- 2025-07-20 17:39:34,039 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:34,926 - sglang - INFO - [2025-07-20 17:39:34 TP0] Decode batch. #running-req: 6, #token: 13162, token usage: 0.35, gen throughput (token/s): 270.61, #queue-req: 0
- 2025-07-20 17:39:34,926 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:35,814 - sglang - INFO - [2025-07-20 17:39:35 TP0] Decode batch. #running-req: 6, #token: 13402, token usage: 0.35, gen throughput (token/s): 270.25, #queue-req: 0
- 2025-07-20 17:39:35,814 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:36,702 - sglang - INFO - [2025-07-20 17:39:36 TP0] Decode batch. #running-req: 6, #token: 13642, token usage: 0.36, gen throughput (token/s): 270.18, #queue-req: 0
- 2025-07-20 17:39:36,702 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:36,888 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 17:39:36,888 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 456.23 447.63
- finished_output_tokens 108.24 100.91
- sglang_input_tokens 463.38 450.89
- sglang_output_tokens 118.34 119.31
- 2025-07-20 17:39:36,889 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:39:37,590 - sglang - INFO - [2025-07-20 17:39:37 TP0] Decode batch. #running-req: 6, #token: 13882, token usage: 0.37, gen throughput (token/s): 270.39, #queue-req: 0
- 2025-07-20 17:39:37,590 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:38,477 - sglang - INFO - [2025-07-20 17:39:38 TP0] Decode batch. #running-req: 6, #token: 14122, token usage: 0.37, gen throughput (token/s): 270.52, #queue-req: 0
- 2025-07-20 17:39:38,477 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:39,364 - sglang - INFO - [2025-07-20 17:39:39 TP0] Decode batch. #running-req: 6, #token: 14362, token usage: 0.38, gen throughput (token/s): 270.54, #queue-req: 0
- 2025-07-20 17:39:39,364 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:39:40,252 - sglang - INFO - [2025-07-20 17:39:40 TP0] Decode batch. #running-req: 5, #token: 12716, token usage: 0.33, gen throughput (token/s): 244.43, #queue-req: 0
- 2025-07-20 17:39:40,252 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:41,137 - sglang - INFO - [2025-07-20 17:39:41 TP0] Decode batch. #running-req: 5, #token: 12916, token usage: 0.34, gen throughput (token/s): 225.95, #queue-req: 0
- 2025-07-20 17:39:41,137 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:42,023 - sglang - INFO - [2025-07-20 17:39:42 TP0] Decode batch. #running-req: 5, #token: 13116, token usage: 0.35, gen throughput (token/s): 225.82, #queue-req: 0
- 2025-07-20 17:39:42,023 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:42,907 - sglang - INFO - [2025-07-20 17:39:42 TP0] Decode batch. #running-req: 5, #token: 13316, token usage: 0.35, gen throughput (token/s): 226.20, #queue-req: 0
- 2025-07-20 17:39:42,907 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:43,785 - sglang - INFO - [2025-07-20 17:39:43 TP0] Decode batch. #running-req: 5, #token: 13516, token usage: 0.36, gen throughput (token/s): 227.87, #queue-req: 0
- 2025-07-20 17:39:43,785 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:39:44,659 - sglang - INFO - [2025-07-20 17:39:44 TP0] Decode batch. #running-req: 3, #token: 8858, token usage: 0.23, gen throughput (token/s): 166.94, #queue-req: 0
- 2025-07-20 17:39:44,660 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:39:45,527 - sglang - INFO - [2025-07-20 17:39:45 TP0] Decode batch. #running-req: 3, #token: 8978, token usage: 0.24, gen throughput (token/s): 138.33, #queue-req: 0
- 2025-07-20 17:39:45,527 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:39:46,376 - sglang - INFO - [2025-07-20 17:39:46 TP0] Decode batch. #running-req: 2, #token: 6167, token usage: 0.16, gen throughput (token/s): 103.56, #queue-req: 0
- 2025-07-20 17:39:46,377 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:46,890 - __main__ - INFO - Queue remaining: 4
- 2025-07-20 17:39:46,890 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 451.17 408.60
- finished_output_tokens 107.03 92.44
- sglang_input_tokens 466.92 448.64
- sglang_output_tokens 118.97 118.28
- 2025-07-20 17:39:46,890 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 6
- 2025-07-20 17:39:47,220 - sglang - INFO - [2025-07-20 17:39:47 TP0] Decode batch. #running-req: 2, #token: 6247, token usage: 0.16, gen throughput (token/s): 94.78, #queue-req: 0
- 2025-07-20 17:39:47,221 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:48,066 - sglang - INFO - [2025-07-20 17:39:48 TP0] Decode batch. #running-req: 2, #token: 6327, token usage: 0.17, gen throughput (token/s): 94.64, #queue-req: 0
- 2025-07-20 17:39:48,066 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:39:48,912 - sglang - INFO - [2025-07-20 17:39:48 TP0] Decode batch. #running-req: 1, #token: 3118, token usage: 0.08, gen throughput (token/s): 55.54, #queue-req: 0
- 2025-07-20 17:39:48,912 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:49,761 - sglang - INFO - [2025-07-20 17:39:49 TP0] Decode batch. #running-req: 1, #token: 3158, token usage: 0.08, gen throughput (token/s): 47.12, #queue-req: 0
- 2025-07-20 17:39:49,761 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:50,605 - sglang - INFO - [2025-07-20 17:39:50 TP0] Decode batch. #running-req: 1, #token: 3198, token usage: 0.08, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:39:50,605 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:51,463 - sglang - INFO - [2025-07-20 17:39:51 TP0] Decode batch. #running-req: 1, #token: 3238, token usage: 0.09, gen throughput (token/s): 46.61, #queue-req: 0
- 2025-07-20 17:39:51,464 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:52,314 - sglang - INFO - [2025-07-20 17:39:52 TP0] Decode batch. #running-req: 1, #token: 3278, token usage: 0.09, gen throughput (token/s): 47.04, #queue-req: 0
- 2025-07-20 17:39:52,314 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:53,166 - sglang - INFO - [2025-07-20 17:39:53 TP0] Decode batch. #running-req: 1, #token: 3318, token usage: 0.09, gen throughput (token/s): 46.96, #queue-req: 0
- 2025-07-20 17:39:53,166 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:53,405 - __main__ - INFO - Finished TaskGroup for worker on 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
- 2025-07-20 17:39:53,406 - __main__ - INFO - Got 1 docs for 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
- 2025-07-20 17:39:53,407 - __main__ - INFO - Worker 0 processing work item 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
- 2025-07-20 17:39:53,408 - __main__ - INFO - Created all tasks for 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
- 2025-07-20 17:39:53,413 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
- 2025-07-20 17:39:53,528 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
- 2025-07-20 17:39:53,559 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
- 2025-07-20 17:39:53,573 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
- 2025-07-20 17:39:53,580 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
- 2025-07-20 17:39:53,608 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
- 2025-07-20 17:39:53,658 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
- 2025-07-20 17:39:53,676 - sglang - INFO - [2025-07-20 17:39:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1499, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:39:53,677 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:39:53,677 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
- 2025-07-20 17:39:54,302 - sglang - INFO - [2025-07-20 17:39:54 TP0] Prefill batch. #new-seq: 6, #new-token: 13502, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:39:54,303 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:39:56,891 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 17:39:56,892 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 460.31 451.39
- finished_output_tokens 109.50 103.51
- sglang_input_tokens 467.30 446.84
- sglang_output_tokens 119.39 120.59
- 2025-07-20 17:39:56,892 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 7
- 2025-07-20 17:39:59,029 - sglang - INFO - [2025-07-20 17:39:59 TP0] Decode batch. #running-req: 7, #token: 15204, token usage: 0.40, gen throughput (token/s): 36.50, #queue-req: 0
- 2025-07-20 17:39:59,029 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:39:59,922 - sglang - INFO - [2025-07-20 17:39:59 TP0] Decode batch. #running-req: 7, #token: 15484, token usage: 0.41, gen throughput (token/s): 313.39, #queue-req: 0
- 2025-07-20 17:39:59,922 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:40:00,816 - sglang - INFO - [2025-07-20 17:40:00 TP0] Decode batch. #running-req: 7, #token: 15764, token usage: 0.41, gen throughput (token/s): 313.16, #queue-req: 0
- 2025-07-20 17:40:00,816 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:40:01,714 - sglang - INFO - [2025-07-20 17:40:01 TP0] Decode batch. #running-req: 7, #token: 16044, token usage: 0.42, gen throughput (token/s): 312.05, #queue-req: 0
- 2025-07-20 17:40:01,714 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:40:02,609 - sglang - INFO - [2025-07-20 17:40:02 TP0] Decode batch. #running-req: 6, #token: 14636, token usage: 0.39, gen throughput (token/s): 275.85, #queue-req: 0
- 2025-07-20 17:40:02,609 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:03,495 - sglang - INFO - [2025-07-20 17:40:03 TP0] Decode batch. #running-req: 6, #token: 14876, token usage: 0.39, gen throughput (token/s): 270.90, #queue-req: 0
- 2025-07-20 17:40:03,495 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:04,377 - sglang - INFO - [2025-07-20 17:40:04 TP0] Decode batch. #running-req: 6, #token: 15116, token usage: 0.40, gen throughput (token/s): 272.09, #queue-req: 0
- 2025-07-20 17:40:04,377 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:05,263 - sglang - INFO - [2025-07-20 17:40:05 TP0] Decode batch. #running-req: 6, #token: 15356, token usage: 0.40, gen throughput (token/s): 271.01, #queue-req: 0
- 2025-07-20 17:40:05,263 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:06,150 - sglang - INFO - [2025-07-20 17:40:06 TP0] Decode batch. #running-req: 6, #token: 15596, token usage: 0.41, gen throughput (token/s): 270.51, #queue-req: 0
- 2025-07-20 17:40:06,150 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:06,893 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 17:40:06,893 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 455.31 406.89
- finished_output_tokens 108.31 93.30
- sglang_input_tokens 463.85 425.86
- sglang_output_tokens 118.26 113.83
- 2025-07-20 17:40:06,894 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 7
- 2025-07-20 17:40:07,042 - sglang - INFO - [2025-07-20 17:40:07 TP0] Decode batch. #running-req: 6, #token: 15836, token usage: 0.42, gen throughput (token/s): 269.08, #queue-req: 0
- 2025-07-20 17:40:07,042 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:07,934 - sglang - INFO - [2025-07-20 17:40:07 TP0] Decode batch. #running-req: 6, #token: 16076, token usage: 0.42, gen throughput (token/s): 268.95, #queue-req: 0
- 2025-07-20 17:40:07,934 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:08,823 - sglang - INFO - [2025-07-20 17:40:08 TP0] Decode batch. #running-req: 5, #token: 13621, token usage: 0.36, gen throughput (token/s): 238.41, #queue-req: 0
- 2025-07-20 17:40:08,824 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:09,703 - sglang - INFO - [2025-07-20 17:40:09 TP0] Decode batch. #running-req: 4, #token: 11505, token usage: 0.30, gen throughput (token/s): 182.96, #queue-req: 0
- 2025-07-20 17:40:09,703 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:40:10,571 - sglang - INFO - [2025-07-20 17:40:10 TP0] Decode batch. #running-req: 4, #token: 11665, token usage: 0.31, gen throughput (token/s): 184.47, #queue-req: 0
- 2025-07-20 17:40:10,571 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:40:11,435 - sglang - INFO - [2025-07-20 17:40:11 TP0] Decode batch. #running-req: 4, #token: 11825, token usage: 0.31, gen throughput (token/s): 185.05, #queue-req: 0
- 2025-07-20 17:40:11,435 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:40:12,288 - sglang - INFO - [2025-07-20 17:40:12 TP0] Decode batch. #running-req: 2, #token: 6278, token usage: 0.17, gen throughput (token/s): 117.26, #queue-req: 0
- 2025-07-20 17:40:12,288 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:40:13,137 - sglang - INFO - [2025-07-20 17:40:13 TP0] Decode batch. #running-req: 2, #token: 6358, token usage: 0.17, gen throughput (token/s): 94.18, #queue-req: 0
- 2025-07-20 17:40:13,138 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:40:13,986 - sglang - INFO - [2025-07-20 17:40:13 TP0] Decode batch. #running-req: 1, #token: 3157, token usage: 0.08, gen throughput (token/s): 76.63, #queue-req: 0
- 2025-07-20 17:40:13,986 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:14,828 - sglang - INFO - [2025-07-20 17:40:14 TP0] Decode batch. #running-req: 1, #token: 3197, token usage: 0.08, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:40:14,829 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:15,670 - sglang - INFO - [2025-07-20 17:40:15 TP0] Decode batch. #running-req: 1, #token: 3237, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:40:15,670 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:16,514 - sglang - INFO - [2025-07-20 17:40:16 TP0] Decode batch. #running-req: 1, #token: 3277, token usage: 0.09, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:40:16,515 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:16,895 - __main__ - INFO - Queue remaining: 3
- 2025-07-20 17:40:16,895 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 450.42 406.89
- finished_output_tokens 107.15 93.30
- sglang_input_tokens 470.75 454.44
- sglang_output_tokens 120.01 122.52
- 2025-07-20 17:40:16,895 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 6 | 7
- 2025-07-20 17:40:17,348 - sglang - INFO - [2025-07-20 17:40:17 TP0] Decode batch. #running-req: 1, #token: 3317, token usage: 0.09, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-07-20 17:40:17,349 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:18,184 - sglang - INFO - [2025-07-20 17:40:18 TP0] Decode batch. #running-req: 1, #token: 3357, token usage: 0.09, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:40:18,185 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:18,419 - __main__ - INFO - Finished TaskGroup for worker on 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
- 2025-07-20 17:40:18,419 - __main__ - INFO - Got 1 docs for 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
- 2025-07-20 17:40:18,421 - __main__ - INFO - Worker 0 processing work item 24809642f1ed21aee754e7c58d350b261d121212
- 2025-07-20 17:40:18,421 - __main__ - INFO - Created all tasks for 24809642f1ed21aee754e7c58d350b261d121212
- 2025-07-20 17:40:18,427 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
- 2025-07-20 17:40:18,520 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
- 2025-07-20 17:40:18,612 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
- 2025-07-20 17:40:18,614 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
- 2025-07-20 17:40:18,636 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
- 2025-07-20 17:40:18,667 - sglang - INFO - [2025-07-20 17:40:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1339, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:40:18,667 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:40:18,670 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
- 2025-07-20 17:40:18,678 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
- 2025-07-20 17:40:19,234 - sglang - INFO - [2025-07-20 17:40:19 TP0] Prefill batch. #new-seq: 5, #new-token: 11852, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
- 2025-07-20 17:40:19,235 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:23,378 - sglang - INFO - [2025-07-20 17:40:23 TP0] Decode batch. #running-req: 6, #token: 13365, token usage: 0.35, gen throughput (token/s): 35.62, #queue-req: 0
- 2025-07-20 17:40:23,379 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:24,258 - sglang - INFO - [2025-07-20 17:40:24 TP0] Decode batch. #running-req: 6, #token: 13605, token usage: 0.36, gen throughput (token/s): 272.69, #queue-req: 0
- 2025-07-20 17:40:24,259 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:25,138 - sglang - INFO - [2025-07-20 17:40:25 TP0] Decode batch. #running-req: 6, #token: 13845, token usage: 0.36, gen throughput (token/s): 272.99, #queue-req: 0
- 2025-07-20 17:40:25,138 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:26,018 - sglang - INFO - [2025-07-20 17:40:26 TP0] Decode batch. #running-req: 6, #token: 14085, token usage: 0.37, gen throughput (token/s): 272.53, #queue-req: 0
- 2025-07-20 17:40:26,019 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:26,897 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 17:40:26,897 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 461.57 456.89
- finished_output_tokens 110.14 106.26
- sglang_input_tokens 468.34 447.66
- sglang_output_tokens 119.71 121.47
- 2025-07-20 17:40:26,898 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 6
- 2025-07-20 17:40:26,901 - sglang - INFO - [2025-07-20 17:40:26 TP0] Decode batch. #running-req: 6, #token: 14325, token usage: 0.38, gen throughput (token/s): 271.74, #queue-req: 0
- 2025-07-20 17:40:26,901 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:40:27,789 - sglang - INFO - [2025-07-20 17:40:27 TP0] Decode batch. #running-req: 5, #token: 12997, token usage: 0.34, gen throughput (token/s): 262.47, #queue-req: 0
- 2025-07-20 17:40:27,789 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:28,668 - sglang - INFO - [2025-07-20 17:40:28 TP0] Decode batch. #running-req: 5, #token: 13197, token usage: 0.35, gen throughput (token/s): 227.66, #queue-req: 0
- 2025-07-20 17:40:28,668 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:29,546 - sglang - INFO - [2025-07-20 17:40:29 TP0] Decode batch. #running-req: 5, #token: 13397, token usage: 0.35, gen throughput (token/s): 227.56, #queue-req: 0
- 2025-07-20 17:40:29,547 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:30,433 - sglang - INFO - [2025-07-20 17:40:30 TP0] Decode batch. #running-req: 5, #token: 13597, token usage: 0.36, gen throughput (token/s): 225.54, #queue-req: 0
- 2025-07-20 17:40:30,433 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:31,312 - sglang - INFO - [2025-07-20 17:40:31 TP0] Decode batch. #running-req: 5, #token: 13797, token usage: 0.36, gen throughput (token/s): 227.52, #queue-req: 0
- 2025-07-20 17:40:31,313 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:32,192 - sglang - INFO - [2025-07-20 17:40:32 TP0] Decode batch. #running-req: 5, #token: 13997, token usage: 0.37, gen throughput (token/s): 227.20, #queue-req: 0
- 2025-07-20 17:40:32,193 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:33,079 - sglang - INFO - [2025-07-20 17:40:33 TP0] Decode batch. #running-req: 5, #token: 14197, token usage: 0.37, gen throughput (token/s): 225.67, #queue-req: 0
- 2025-07-20 17:40:33,079 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:33,970 - sglang - INFO - [2025-07-20 17:40:33 TP0] Decode batch. #running-req: 5, #token: 14397, token usage: 0.38, gen throughput (token/s): 224.35, #queue-req: 0
- 2025-07-20 17:40:33,971 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:34,855 - sglang - INFO - [2025-07-20 17:40:34 TP0] Decode batch. #running-req: 5, #token: 14597, token usage: 0.38, gen throughput (token/s): 226.02, #queue-req: 0
- 2025-07-20 17:40:34,855 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:35,738 - sglang - INFO - [2025-07-20 17:40:35 TP0] Decode batch. #running-req: 5, #token: 14797, token usage: 0.39, gen throughput (token/s): 226.61, #queue-req: 0
- 2025-07-20 17:40:35,738 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:40:36,622 - sglang - INFO - [2025-07-20 17:40:36 TP0] Decode batch. #running-req: 4, #token: 12034, token usage: 0.32, gen throughput (token/s): 206.95, #queue-req: 0
- 2025-07-20 17:40:36,622 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:40:36,899 - __main__ - INFO - Queue remaining: 2
- 2025-07-20 17:40:36,899 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 456.72 426.36
- finished_output_tokens 108.98 98.80
- sglang_input_tokens 467.28 439.72
- sglang_output_tokens 119.33 119.59
- 2025-07-20 17:40:36,900 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 6
- 2025-07-20 17:40:37,495 - sglang - INFO - [2025-07-20 17:40:37 TP0] Decode batch. #running-req: 3, #token: 8915, token usage: 0.23, gen throughput (token/s): 167.24, #queue-req: 0
- 2025-07-20 17:40:37,495 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:40:38,369 - sglang - INFO - [2025-07-20 17:40:38 TP0] Decode batch. #running-req: 2, #token: 6214, token usage: 0.16, gen throughput (token/s): 136.17, #queue-req: 0
- 2025-07-20 17:40:38,369 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:40:39,224 - sglang - INFO - [2025-07-20 17:40:39 TP0] Decode batch. #running-req: 2, #token: 6294, token usage: 0.17, gen throughput (token/s): 93.60, #queue-req: 0
- 2025-07-20 17:40:39,224 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:40:40,076 - sglang - INFO - [2025-07-20 17:40:40 TP0] Decode batch. #running-req: 1, #token: 3246, token usage: 0.09, gen throughput (token/s): 59.85, #queue-req: 0
- 2025-07-20 17:40:40,076 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:40,912 - sglang - INFO - [2025-07-20 17:40:40 TP0] Decode batch. #running-req: 1, #token: 3286, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:40:40,912 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:41,746 - sglang - INFO - [2025-07-20 17:40:41 TP0] Decode batch. #running-req: 1, #token: 3326, token usage: 0.09, gen throughput (token/s): 47.98, #queue-req: 0
- 2025-07-20 17:40:41,746 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:42,578 - sglang - INFO - [2025-07-20 17:40:42 TP0] Decode batch. #running-req: 1, #token: 3366, token usage: 0.09, gen throughput (token/s): 48.03, #queue-req: 0
- 2025-07-20 17:40:42,579 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:43,413 - sglang - INFO - [2025-07-20 17:40:43 TP0] Decode batch. #running-req: 1, #token: 3406, token usage: 0.09, gen throughput (token/s): 47.92, #queue-req: 0
- 2025-07-20 17:40:43,414 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:40:43,817 - __main__ - INFO - Finished TaskGroup for worker on 24809642f1ed21aee754e7c58d350b261d121212
- 2025-07-20 17:40:43,818 - __main__ - INFO - Got 1 docs for 24809642f1ed21aee754e7c58d350b261d121212
- 2025-07-20 17:40:43,819 - __main__ - INFO - Worker 0 processing work item 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:40:43,819 - __main__ - INFO - Created all tasks for 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:40:43,827 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
- 2025-07-20 17:40:43,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
- 2025-07-20 17:40:43,968 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
- 2025-07-20 17:40:43,972 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
- 2025-07-20 17:40:43,974 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
- 2025-07-20 17:40:43,975 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
- 2025-07-20 17:40:43,977 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
- 2025-07-20 17:40:43,985 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
- 2025-07-20 17:40:43,989 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
- 2025-07-20 17:40:44,018 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
- 2025-07-20 17:40:44,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
- 2025-07-20 17:40:44,117 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
- 2025-07-20 17:40:44,142 - sglang - INFO - [2025-07-20 17:40:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:40:44,142 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:40:44,896 - sglang - INFO - [2025-07-20 17:40:44 TP0] Prefill batch. #new-seq: 6, #new-token: 12273, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 4
- 2025-07-20 17:40:44,896 - __main__ - INFO - sglang running req: 1 queue req: 4
- 2025-07-20 17:40:46,901 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:40:46,901 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 465.69 470.33
- finished_output_tokens 111.94 111.90
- sglang_input_tokens 472.32 456.05
- sglang_output_tokens 121.31 127.10
- 2025-07-20 17:40:46,901 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-07-20 17:40:49,174 - sglang - INFO - [2025-07-20 17:40:49 TP0] Decode batch. #running-req: 7, #token: 14400, token usage: 0.38, gen throughput (token/s): 28.81, #queue-req: 4
- 2025-07-20 17:40:49,175 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:50,062 - sglang - INFO - [2025-07-20 17:40:50 TP0] Decode batch. #running-req: 7, #token: 14680, token usage: 0.39, gen throughput (token/s): 315.36, #queue-req: 4
- 2025-07-20 17:40:50,063 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:50,956 - sglang - INFO - [2025-07-20 17:40:50 TP0] Decode batch. #running-req: 7, #token: 14960, token usage: 0.39, gen throughput (token/s): 313.27, #queue-req: 4
- 2025-07-20 17:40:50,957 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:51,840 - sglang - INFO - [2025-07-20 17:40:51 TP0] Decode batch. #running-req: 7, #token: 15240, token usage: 0.40, gen throughput (token/s): 316.72, #queue-req: 4
- 2025-07-20 17:40:51,841 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:52,722 - sglang - INFO - [2025-07-20 17:40:52 TP0] Decode batch. #running-req: 7, #token: 15520, token usage: 0.41, gen throughput (token/s): 317.53, #queue-req: 4
- 2025-07-20 17:40:52,722 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:53,614 - sglang - INFO - [2025-07-20 17:40:53 TP0] Decode batch. #running-req: 7, #token: 15800, token usage: 0.42, gen throughput (token/s): 313.91, #queue-req: 4
- 2025-07-20 17:40:53,614 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:54,507 - sglang - INFO - [2025-07-20 17:40:54 TP0] Decode batch. #running-req: 7, #token: 16080, token usage: 0.42, gen throughput (token/s): 313.44, #queue-req: 4
- 2025-07-20 17:40:54,508 - __main__ - INFO - sglang running req: 7 queue req: 4
- 2025-07-20 17:40:55,332 - sglang - INFO - [2025-07-20 17:40:55 TP0] Prefill batch. #new-seq: 3, #new-token: 6536, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 1
- 2025-07-20 17:40:55,333 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-07-20 17:40:56,903 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:40:56,903 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 460.90 470.33
- finished_output_tokens 110.79 111.90
- sglang_input_tokens 469.50 407.44
- sglang_output_tokens 120.37 115.07
- 2025-07-20 17:40:56,903 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 11
- 2025-07-20 17:40:57,416 - sglang - INFO - [2025-07-20 17:40:57 TP0] Decode batch. #running-req: 9, #token: 20624, token usage: 0.54, gen throughput (token/s): 97.98, #queue-req: 1
- 2025-07-20 17:40:57,416 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:40:58,377 - sglang - INFO - [2025-07-20 17:40:58 TP0] Decode batch. #running-req: 9, #token: 20984, token usage: 0.55, gen throughput (token/s): 374.65, #queue-req: 1
- 2025-07-20 17:40:58,377 - __main__ - INFO - sglang running req: 9 queue req: 1
- 2025-07-20 17:40:58,520 - sglang - INFO - [2025-07-20 17:40:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2579, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.49, #running-req: 8, #queue-req: 0
- 2025-07-20 17:40:58,520 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:41:00,068 - sglang - INFO - [2025-07-20 17:41:00 TP0] Decode batch. #running-req: 6, #token: 14139, token usage: 0.37, gen throughput (token/s): 175.60, #queue-req: 0
- 2025-07-20 17:41:00,069 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:41:00,953 - sglang - INFO - [2025-07-20 17:41:00 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 241.78, #queue-req: 0
- 2025-07-20 17:41:00,954 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:41:01,835 - sglang - INFO - [2025-07-20 17:41:01 TP0] Decode batch. #running-req: 5, #token: 12090, token usage: 0.32, gen throughput (token/s): 226.94, #queue-req: 0
- 2025-07-20 17:41:01,835 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:41:02,704 - sglang - INFO - [2025-07-20 17:41:02 TP0] Decode batch. #running-req: 4, #token: 9878, token usage: 0.26, gen throughput (token/s): 196.64, #queue-req: 0
- 2025-07-20 17:41:02,705 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:03,570 - sglang - INFO - [2025-07-20 17:41:03 TP0] Decode batch. #running-req: 4, #token: 10038, token usage: 0.26, gen throughput (token/s): 184.76, #queue-req: 0
- 2025-07-20 17:41:03,570 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:04,442 - sglang - INFO - [2025-07-20 17:41:04 TP0] Decode batch. #running-req: 4, #token: 10198, token usage: 0.27, gen throughput (token/s): 183.60, #queue-req: 0
- 2025-07-20 17:41:04,442 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:05,319 - sglang - INFO - [2025-07-20 17:41:05 TP0] Decode batch. #running-req: 4, #token: 10358, token usage: 0.27, gen throughput (token/s): 182.48, #queue-req: 0
- 2025-07-20 17:41:05,319 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:06,189 - sglang - INFO - [2025-07-20 17:41:06 TP0] Decode batch. #running-req: 4, #token: 10518, token usage: 0.28, gen throughput (token/s): 183.72, #queue-req: 0
- 2025-07-20 17:41:06,190 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:06,905 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:41:06,905 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 456.19 470.33
- finished_output_tokens 109.66 111.90
- sglang_input_tokens 477.22 432.04
- sglang_output_tokens 121.48 117.48
- 2025-07-20 17:41:06,905 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 11
- 2025-07-20 17:41:07,052 - sglang - INFO - [2025-07-20 17:41:07 TP0] Decode batch. #running-req: 4, #token: 10678, token usage: 0.28, gen throughput (token/s): 185.51, #queue-req: 0
- 2025-07-20 17:41:07,052 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:07,919 - sglang - INFO - [2025-07-20 17:41:07 TP0] Decode batch. #running-req: 4, #token: 10838, token usage: 0.29, gen throughput (token/s): 184.44, #queue-req: 0
- 2025-07-20 17:41:07,920 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:08,789 - sglang - INFO - [2025-07-20 17:41:08 TP0] Decode batch. #running-req: 3, #token: 8742, token usage: 0.23, gen throughput (token/s): 157.48, #queue-req: 0
- 2025-07-20 17:41:08,790 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:09,656 - sglang - INFO - [2025-07-20 17:41:09 TP0] Decode batch. #running-req: 3, #token: 8862, token usage: 0.23, gen throughput (token/s): 138.55, #queue-req: 0
- 2025-07-20 17:41:09,656 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:10,520 - sglang - INFO - [2025-07-20 17:41:10 TP0] Decode batch. #running-req: 3, #token: 8982, token usage: 0.24, gen throughput (token/s): 138.76, #queue-req: 0
- 2025-07-20 17:41:10,521 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:11,387 - sglang - INFO - [2025-07-20 17:41:11 TP0] Decode batch. #running-req: 3, #token: 9102, token usage: 0.24, gen throughput (token/s): 138.40, #queue-req: 0
- 2025-07-20 17:41:11,388 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:12,257 - sglang - INFO - [2025-07-20 17:41:12 TP0] Decode batch. #running-req: 3, #token: 9222, token usage: 0.24, gen throughput (token/s): 137.99, #queue-req: 0
- 2025-07-20 17:41:12,257 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:13,115 - sglang - INFO - [2025-07-20 17:41:13 TP0] Decode batch. #running-req: 2, #token: 6119, token usage: 0.16, gen throughput (token/s): 96.78, #queue-req: 0
- 2025-07-20 17:41:13,115 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:13,960 - sglang - INFO - [2025-07-20 17:41:13 TP0] Decode batch. #running-req: 2, #token: 6199, token usage: 0.16, gen throughput (token/s): 94.63, #queue-req: 0
- 2025-07-20 17:41:13,960 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:14,806 - sglang - INFO - [2025-07-20 17:41:14 TP0] Decode batch. #running-req: 2, #token: 6279, token usage: 0.17, gen throughput (token/s): 94.58, #queue-req: 0
- 2025-07-20 17:41:14,806 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:15,657 - sglang - INFO - [2025-07-20 17:41:15 TP0] Decode batch. #running-req: 2, #token: 6359, token usage: 0.17, gen throughput (token/s): 93.93, #queue-req: 0
- 2025-07-20 17:41:15,658 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:16,507 - sglang - INFO - [2025-07-20 17:41:16 TP0] Decode batch. #running-req: 2, #token: 6439, token usage: 0.17, gen throughput (token/s): 94.17, #queue-req: 0
- 2025-07-20 17:41:16,507 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:16,906 - __main__ - INFO - Queue remaining: 1
- 2025-07-20 17:41:16,907 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 451.59 470.33
- finished_output_tokens 108.55 111.90
- sglang_input_tokens 479.00 453.82
- sglang_output_tokens 122.24 124.01
- 2025-07-20 17:41:16,907 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-07-20 17:41:17,351 - sglang - INFO - [2025-07-20 17:41:17 TP0] Decode batch. #running-req: 1, #token: 3413, token usage: 0.09, gen throughput (token/s): 58.09, #queue-req: 0
- 2025-07-20 17:41:17,351 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:18,193 - sglang - INFO - [2025-07-20 17:41:18 TP0] Decode batch. #running-req: 1, #token: 3453, token usage: 0.09, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:41:18,193 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:19,041 - sglang - INFO - [2025-07-20 17:41:19 TP0] Decode batch. #running-req: 1, #token: 3493, token usage: 0.09, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-07-20 17:41:19,041 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:19,889 - sglang - INFO - [2025-07-20 17:41:19 TP0] Decode batch. #running-req: 1, #token: 3533, token usage: 0.09, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-07-20 17:41:19,889 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:19,937 - __main__ - INFO - Finished TaskGroup for worker on 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:41:19,938 - __main__ - INFO - Got 1 docs for 9face5eb793573e747789b627bf1cc4b334b5b93
- 2025-07-20 17:41:19,939 - __main__ - INFO - Worker 0 processing work item 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
- 2025-07-20 17:41:19,940 - __main__ - INFO - Created all tasks for 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
- 2025-07-20 17:41:19,947 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
- 2025-07-20 17:41:20,073 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
- 2025-07-20 17:41:20,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
- 2025-07-20 17:41:20,091 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
- 2025-07-20 17:41:20,098 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
- 2025-07-20 17:41:20,100 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
- 2025-07-20 17:41:20,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
- 2025-07-20 17:41:20,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:41:20,168 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
- 2025-07-20 17:41:20,186 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
- 2025-07-20 17:41:20,229 - sglang - INFO - [2025-07-20 17:41:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1759, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:41:20,230 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:41:20,913 - sglang - INFO - [2025-07-20 17:41:20 TP0] Prefill batch. #new-seq: 6, #new-token: 13155, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
- 2025-07-20 17:41:20,913 - __main__ - INFO - sglang running req: 1 queue req: 2
- 2025-07-20 17:41:25,747 - sglang - INFO - [2025-07-20 17:41:25 TP0] Decode batch. #running-req: 7, #token: 15180, token usage: 0.40, gen throughput (token/s): 45.75, #queue-req: 2
- 2025-07-20 17:41:25,747 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:26,644 - sglang - INFO - [2025-07-20 17:41:26 TP0] Decode batch. #running-req: 7, #token: 15460, token usage: 0.41, gen throughput (token/s): 312.14, #queue-req: 2
- 2025-07-20 17:41:26,644 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:26,908 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:41:26,908 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 470.43 548.22
- finished_output_tokens 112.98 130.29
- sglang_input_tokens 476.79 462.42
- sglang_output_tokens 121.97 127.20
- 2025-07-20 17:41:26,908 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 9
- 2025-07-20 17:41:27,531 - sglang - INFO - [2025-07-20 17:41:27 TP0] Decode batch. #running-req: 7, #token: 15740, token usage: 0.41, gen throughput (token/s): 315.73, #queue-req: 2
- 2025-07-20 17:41:27,531 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:28,418 - sglang - INFO - [2025-07-20 17:41:28 TP0] Decode batch. #running-req: 7, #token: 16020, token usage: 0.42, gen throughput (token/s): 315.50, #queue-req: 2
- 2025-07-20 17:41:28,419 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:29,315 - sglang - INFO - [2025-07-20 17:41:29 TP0] Decode batch. #running-req: 7, #token: 16300, token usage: 0.43, gen throughput (token/s): 312.34, #queue-req: 2
- 2025-07-20 17:41:29,315 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:30,209 - sglang - INFO - [2025-07-20 17:41:30 TP0] Decode batch. #running-req: 7, #token: 16580, token usage: 0.44, gen throughput (token/s): 313.00, #queue-req: 2
- 2025-07-20 17:41:30,210 - __main__ - INFO - sglang running req: 7 queue req: 2
- 2025-07-20 17:41:30,791 - sglang - INFO - [2025-07-20 17:41:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4685, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 0
- 2025-07-20 17:41:30,791 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:41:32,535 - sglang - INFO - [2025-07-20 17:41:32 TP0] Decode batch. #running-req: 8, #token: 19481, token usage: 0.51, gen throughput (token/s): 125.99, #queue-req: 0
- 2025-07-20 17:41:32,535 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:41:33,442 - sglang - INFO - [2025-07-20 17:41:33 TP0] Decode batch. #running-req: 8, #token: 19801, token usage: 0.52, gen throughput (token/s): 352.97, #queue-req: 0
- 2025-07-20 17:41:33,442 - __main__ - INFO - sglang running req: 8 queue req: 0
- 2025-07-20 17:41:34,336 - sglang - INFO - [2025-07-20 17:41:34 TP0] Decode batch. #running-req: 7, #token: 17928, token usage: 0.47, gen throughput (token/s): 327.59, #queue-req: 0
- 2025-07-20 17:41:34,336 - __main__ - INFO - sglang running req: 7 queue req: 0
- 2025-07-20 17:41:35,222 - sglang - INFO - [2025-07-20 17:41:35 TP0] Decode batch. #running-req: 6, #token: 15761, token usage: 0.41, gen throughput (token/s): 287.81, #queue-req: 0
- 2025-07-20 17:41:35,222 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-07-20 17:41:36,111 - sglang - INFO - [2025-07-20 17:41:36 TP0] Decode batch. #running-req: 5, #token: 13804, token usage: 0.36, gen throughput (token/s): 239.53, #queue-req: 0
- 2025-07-20 17:41:36,111 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:41:36,910 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:41:36,910 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 465.77 548.22
- finished_output_tokens 111.86 130.29
- sglang_input_tokens 479.45 487.28
- sglang_output_tokens 122.13 131.81
- 2025-07-20 17:41:36,910 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 9
- 2025-07-20 17:41:36,996 - sglang - INFO - [2025-07-20 17:41:36 TP0] Decode batch. #running-req: 5, #token: 14004, token usage: 0.37, gen throughput (token/s): 225.99, #queue-req: 0
- 2025-07-20 17:41:36,997 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-07-20 17:41:37,873 - sglang - INFO - [2025-07-20 17:41:37 TP0] Decode batch. #running-req: 4, #token: 11440, token usage: 0.30, gen throughput (token/s): 216.72, #queue-req: 0
- 2025-07-20 17:41:37,873 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:38,736 - sglang - INFO - [2025-07-20 17:41:38 TP0] Decode batch. #running-req: 4, #token: 11600, token usage: 0.31, gen throughput (token/s): 185.41, #queue-req: 0
- 2025-07-20 17:41:38,736 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:39,602 - sglang - INFO - [2025-07-20 17:41:39 TP0] Decode batch. #running-req: 4, #token: 11760, token usage: 0.31, gen throughput (token/s): 184.68, #queue-req: 0
- 2025-07-20 17:41:39,602 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-07-20 17:41:40,477 - sglang - INFO - [2025-07-20 17:41:40 TP0] Decode batch. #running-req: 3, #token: 9178, token usage: 0.24, gen throughput (token/s): 176.08, #queue-req: 0
- 2025-07-20 17:41:40,477 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-07-20 17:41:41,324 - sglang - INFO - [2025-07-20 17:41:41 TP0] Decode batch. #running-req: 2, #token: 6017, token usage: 0.16, gen throughput (token/s): 102.71, #queue-req: 0
- 2025-07-20 17:41:41,324 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:42,169 - sglang - INFO - [2025-07-20 17:41:42 TP0] Decode batch. #running-req: 2, #token: 6097, token usage: 0.16, gen throughput (token/s): 94.70, #queue-req: 0
- 2025-07-20 17:41:42,169 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:43,020 - sglang - INFO - [2025-07-20 17:41:43 TP0] Decode batch. #running-req: 2, #token: 6177, token usage: 0.16, gen throughput (token/s): 93.94, #queue-req: 0
- 2025-07-20 17:41:43,020 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:43,866 - sglang - INFO - [2025-07-20 17:41:43 TP0] Decode batch. #running-req: 2, #token: 6257, token usage: 0.16, gen throughput (token/s): 94.59, #queue-req: 0
- 2025-07-20 17:41:43,866 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:44,712 - sglang - INFO - [2025-07-20 17:41:44 TP0] Decode batch. #running-req: 2, #token: 6337, token usage: 0.17, gen throughput (token/s): 94.62, #queue-req: 0
- 2025-07-20 17:41:44,712 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:45,558 - sglang - INFO - [2025-07-20 17:41:45 TP0] Decode batch. #running-req: 2, #token: 6417, token usage: 0.17, gen throughput (token/s): 94.51, #queue-req: 0
- 2025-07-20 17:41:45,558 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:46,413 - sglang - INFO - [2025-07-20 17:41:46 TP0] Decode batch. #running-req: 2, #token: 6497, token usage: 0.17, gen throughput (token/s): 93.59, #queue-req: 0
- 2025-07-20 17:41:46,413 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:46,912 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:41:46,912 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 461.21 548.22
- finished_output_tokens 110.76 130.29
- sglang_input_tokens 481.82 504.34
- sglang_output_tokens 122.43 126.89
- 2025-07-20 17:41:46,912 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 7 | 9
- 2025-07-20 17:41:47,268 - sglang - INFO - [2025-07-20 17:41:47 TP0] Decode batch. #running-req: 2, #token: 6577, token usage: 0.17, gen throughput (token/s): 93.50, #queue-req: 0
- 2025-07-20 17:41:47,269 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:48,113 - sglang - INFO - [2025-07-20 17:41:48 TP0] Decode batch. #running-req: 2, #token: 6657, token usage: 0.18, gen throughput (token/s): 94.74, #queue-req: 0
- 2025-07-20 17:41:48,113 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:48,959 - sglang - INFO - [2025-07-20 17:41:48 TP0] Decode batch. #running-req: 2, #token: 6737, token usage: 0.18, gen throughput (token/s): 94.54, #queue-req: 0
- 2025-07-20 17:41:48,959 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:49,812 - sglang - INFO - [2025-07-20 17:41:49 TP0] Decode batch. #running-req: 2, #token: 6817, token usage: 0.18, gen throughput (token/s): 93.76, #queue-req: 0
- 2025-07-20 17:41:49,812 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:50,661 - sglang - INFO - [2025-07-20 17:41:50 TP0] Decode batch. #running-req: 2, #token: 6897, token usage: 0.18, gen throughput (token/s): 94.27, #queue-req: 0
- 2025-07-20 17:41:50,661 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-07-20 17:41:51,512 - sglang - INFO - [2025-07-20 17:41:51 TP0] Decode batch. #running-req: 1, #token: 3766, token usage: 0.10, gen throughput (token/s): 92.83, #queue-req: 0
- 2025-07-20 17:41:51,512 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:52,354 - sglang - INFO - [2025-07-20 17:41:52 TP0] Decode batch. #running-req: 1, #token: 3806, token usage: 0.10, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:41:52,354 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:53,201 - sglang - INFO - [2025-07-20 17:41:53 TP0] Decode batch. #running-req: 1, #token: 3846, token usage: 0.10, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:41:53,201 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:54,048 - sglang - INFO - [2025-07-20 17:41:54 TP0] Decode batch. #running-req: 1, #token: 3886, token usage: 0.10, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:41:54,048 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:54,883 - sglang - INFO - [2025-07-20 17:41:54 TP0] Decode batch. #running-req: 1, #token: 3926, token usage: 0.10, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:41:54,883 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:55,718 - sglang - INFO - [2025-07-20 17:41:55 TP0] Decode batch. #running-req: 1, #token: 3966, token usage: 0.10, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:41:55,718 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:56,562 - sglang - INFO - [2025-07-20 17:41:56 TP0] Decode batch. #running-req: 1, #token: 4006, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:41:56,562 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:56,914 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:41:56,915 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 456.73 548.22
- finished_output_tokens 109.69 130.29
- sglang_input_tokens 479.40 512.07
- sglang_output_tokens 122.11 129.87
- 2025-07-20 17:41:56,915 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:41:57,407 - sglang - INFO - [2025-07-20 17:41:57 TP0] Decode batch. #running-req: 1, #token: 4046, token usage: 0.11, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-07-20 17:41:57,407 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:58,250 - sglang - INFO - [2025-07-20 17:41:58 TP0] Decode batch. #running-req: 1, #token: 4086, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:41:58,250 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:59,093 - sglang - INFO - [2025-07-20 17:41:59 TP0] Decode batch. #running-req: 1, #token: 4126, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:41:59,094 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:41:59,942 - sglang - INFO - [2025-07-20 17:41:59 TP0] Decode batch. #running-req: 1, #token: 4166, token usage: 0.11, gen throughput (token/s): 47.12, #queue-req: 0
- 2025-07-20 17:41:59,942 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:00,793 - sglang - INFO - [2025-07-20 17:42:00 TP0] Decode batch. #running-req: 1, #token: 4206, token usage: 0.11, gen throughput (token/s): 47.03, #queue-req: 0
- 2025-07-20 17:42:00,793 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:01,630 - sglang - INFO - [2025-07-20 17:42:01 TP0] Decode batch. #running-req: 1, #token: 4246, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:42:01,630 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:02,467 - sglang - INFO - [2025-07-20 17:42:02 TP0] Decode batch. #running-req: 1, #token: 4286, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:42:02,467 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:03,313 - sglang - INFO - [2025-07-20 17:42:03 TP0] Decode batch. #running-req: 1, #token: 4326, token usage: 0.11, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:42:03,313 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:04,164 - sglang - INFO - [2025-07-20 17:42:04 TP0] Decode batch. #running-req: 1, #token: 4366, token usage: 0.11, gen throughput (token/s): 47.01, #queue-req: 0
- 2025-07-20 17:42:04,164 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:05,015 - sglang - INFO - [2025-07-20 17:42:05 TP0] Decode batch. #running-req: 1, #token: 4406, token usage: 0.12, gen throughput (token/s): 47.00, #queue-req: 0
- 2025-07-20 17:42:05,015 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:05,863 - sglang - INFO - [2025-07-20 17:42:05 TP0] Decode batch. #running-req: 1, #token: 4446, token usage: 0.12, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-07-20 17:42:05,863 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:06,713 - sglang - INFO - [2025-07-20 17:42:06 TP0] Decode batch. #running-req: 1, #token: 4486, token usage: 0.12, gen throughput (token/s): 47.07, #queue-req: 0
- 2025-07-20 17:42:06,713 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:06,916 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:06,916 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 452.34 548.22
- finished_output_tokens 108.63 130.29
- sglang_input_tokens 474.79 512.07
- sglang_output_tokens 120.93 129.87
- 2025-07-20 17:42:06,916 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:07,568 - sglang - INFO - [2025-07-20 17:42:07 TP0] Decode batch. #running-req: 1, #token: 4526, token usage: 0.12, gen throughput (token/s): 46.80, #queue-req: 0
- 2025-07-20 17:42:07,568 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:08,415 - sglang - INFO - [2025-07-20 17:42:08 TP0] Decode batch. #running-req: 1, #token: 4566, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-07-20 17:42:08,415 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:09,255 - sglang - INFO - [2025-07-20 17:42:09 TP0] Decode batch. #running-req: 1, #token: 4606, token usage: 0.12, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:42:09,256 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:10,099 - sglang - INFO - [2025-07-20 17:42:10 TP0] Decode batch. #running-req: 1, #token: 4646, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:42:10,099 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:10,948 - sglang - INFO - [2025-07-20 17:42:10 TP0] Decode batch. #running-req: 1, #token: 4686, token usage: 0.12, gen throughput (token/s): 47.10, #queue-req: 0
- 2025-07-20 17:42:10,949 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:11,796 - sglang - INFO - [2025-07-20 17:42:11 TP0] Decode batch. #running-req: 1, #token: 4726, token usage: 0.12, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-07-20 17:42:11,796 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:12,647 - sglang - INFO - [2025-07-20 17:42:12 TP0] Decode batch. #running-req: 1, #token: 4766, token usage: 0.13, gen throughput (token/s): 46.99, #queue-req: 0
- 2025-07-20 17:42:12,648 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:13,499 - sglang - INFO - [2025-07-20 17:42:13 TP0] Decode batch. #running-req: 1, #token: 4806, token usage: 0.13, gen throughput (token/s): 46.96, #queue-req: 0
- 2025-07-20 17:42:13,499 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:14,354 - sglang - INFO - [2025-07-20 17:42:14 TP0] Decode batch. #running-req: 1, #token: 4846, token usage: 0.13, gen throughput (token/s): 46.79, #queue-req: 0
- 2025-07-20 17:42:14,354 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:15,208 - sglang - INFO - [2025-07-20 17:42:15 TP0] Decode batch. #running-req: 1, #token: 4886, token usage: 0.13, gen throughput (token/s): 46.84, #queue-req: 0
- 2025-07-20 17:42:15,208 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:16,050 - sglang - INFO - [2025-07-20 17:42:16 TP0] Decode batch. #running-req: 1, #token: 4926, token usage: 0.13, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:42:16,050 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:16,891 - sglang - INFO - [2025-07-20 17:42:16 TP0] Decode batch. #running-req: 1, #token: 4966, token usage: 0.13, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:42:16,891 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:16,918 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:16,919 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 448.04 548.22
- finished_output_tokens 107.60 130.29
- sglang_input_tokens 470.27 512.07
- sglang_output_tokens 119.78 129.87
- 2025-07-20 17:42:16,919 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:17,737 - sglang - INFO - [2025-07-20 17:42:17 TP0] Decode batch. #running-req: 1, #token: 5006, token usage: 0.13, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:42:17,738 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:18,589 - sglang - INFO - [2025-07-20 17:42:18 TP0] Decode batch. #running-req: 1, #token: 5046, token usage: 0.13, gen throughput (token/s): 46.96, #queue-req: 0
- 2025-07-20 17:42:18,589 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:19,441 - sglang - INFO - [2025-07-20 17:42:19 TP0] Decode batch. #running-req: 1, #token: 5086, token usage: 0.13, gen throughput (token/s): 46.97, #queue-req: 0
- 2025-07-20 17:42:19,441 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:20,294 - sglang - INFO - [2025-07-20 17:42:20 TP0] Decode batch. #running-req: 1, #token: 5126, token usage: 0.13, gen throughput (token/s): 46.89, #queue-req: 0
- 2025-07-20 17:42:20,294 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:21,148 - sglang - INFO - [2025-07-20 17:42:21 TP0] Decode batch. #running-req: 1, #token: 5166, token usage: 0.14, gen throughput (token/s): 46.84, #queue-req: 0
- 2025-07-20 17:42:21,148 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:22,000 - sglang - INFO - [2025-07-20 17:42:22 TP0] Decode batch. #running-req: 1, #token: 5206, token usage: 0.14, gen throughput (token/s): 46.93, #queue-req: 0
- 2025-07-20 17:42:22,000 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:22,842 - sglang - INFO - [2025-07-20 17:42:22 TP0] Decode batch. #running-req: 1, #token: 5246, token usage: 0.14, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:42:22,842 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:23,686 - sglang - INFO - [2025-07-20 17:42:23 TP0] Decode batch. #running-req: 1, #token: 5286, token usage: 0.14, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:42:23,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:24,538 - sglang - INFO - [2025-07-20 17:42:24 TP0] Decode batch. #running-req: 1, #token: 5326, token usage: 0.14, gen throughput (token/s): 46.96, #queue-req: 0
- 2025-07-20 17:42:24,538 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:25,390 - sglang - INFO - [2025-07-20 17:42:25 TP0] Decode batch. #running-req: 1, #token: 5366, token usage: 0.14, gen throughput (token/s): 46.95, #queue-req: 0
- 2025-07-20 17:42:25,390 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:26,238 - sglang - INFO - [2025-07-20 17:42:26 TP0] Decode batch. #running-req: 1, #token: 5406, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-07-20 17:42:26,238 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:26,920 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:26,921 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 443.81 548.22
- finished_output_tokens 106.58 130.29
- sglang_input_tokens 465.83 512.07
- sglang_output_tokens 118.65 129.87
- 2025-07-20 17:42:26,921 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:27,080 - sglang - INFO - [2025-07-20 17:42:27 TP0] Decode batch. #running-req: 1, #token: 5446, token usage: 0.14, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:42:27,081 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:27,931 - sglang - INFO - [2025-07-20 17:42:27 TP0] Decode batch. #running-req: 1, #token: 5486, token usage: 0.14, gen throughput (token/s): 47.03, #queue-req: 0
- 2025-07-20 17:42:27,931 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:28,781 - sglang - INFO - [2025-07-20 17:42:28 TP0] Decode batch. #running-req: 1, #token: 5526, token usage: 0.15, gen throughput (token/s): 47.04, #queue-req: 0
- 2025-07-20 17:42:28,781 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:29,625 - sglang - INFO - [2025-07-20 17:42:29 TP0] Decode batch. #running-req: 1, #token: 5566, token usage: 0.15, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:42:29,625 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:30,468 - sglang - INFO - [2025-07-20 17:42:30 TP0] Decode batch. #running-req: 1, #token: 5606, token usage: 0.15, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:42:30,468 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:30,496 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:42:30,684 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:42:30,904 - sglang - INFO - [2025-07-20 17:42:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:42:30,905 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:42:32,503 - sglang - INFO - [2025-07-20 17:42:32 TP0] Decode batch. #running-req: 1, #token: 2647, token usage: 0.07, gen throughput (token/s): 19.66, #queue-req: 0
- 2025-07-20 17:42:32,503 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:33,341 - sglang - INFO - [2025-07-20 17:42:33 TP0] Decode batch. #running-req: 1, #token: 2687, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:42:33,341 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:34,175 - sglang - INFO - [2025-07-20 17:42:34 TP0] Decode batch. #running-req: 1, #token: 2727, token usage: 0.07, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:42:34,175 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:35,009 - sglang - INFO - [2025-07-20 17:42:35 TP0] Decode batch. #running-req: 1, #token: 2767, token usage: 0.07, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:42:35,009 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:35,849 - sglang - INFO - [2025-07-20 17:42:35 TP0] Decode batch. #running-req: 1, #token: 2807, token usage: 0.07, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:42:35,850 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:36,683 - sglang - INFO - [2025-07-20 17:42:36 TP0] Decode batch. #running-req: 1, #token: 2847, token usage: 0.07, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:42:36,683 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:36,924 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:36,924 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 439.66 548.22
- finished_output_tokens 105.59 130.29
- sglang_input_tokens 463.92 520.76
- sglang_output_tokens 120.35 139.87
- 2025-07-20 17:42:36,924 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:37,514 - sglang - INFO - [2025-07-20 17:42:37 TP0] Decode batch. #running-req: 1, #token: 2887, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:42:37,515 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:38,351 - sglang - INFO - [2025-07-20 17:42:38 TP0] Decode batch. #running-req: 1, #token: 2927, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:42:38,351 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:39,195 - sglang - INFO - [2025-07-20 17:42:39 TP0] Decode batch. #running-req: 1, #token: 2967, token usage: 0.08, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:42:39,195 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:40,033 - sglang - INFO - [2025-07-20 17:42:40 TP0] Decode batch. #running-req: 1, #token: 3007, token usage: 0.08, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:42:40,033 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:40,869 - sglang - INFO - [2025-07-20 17:42:40 TP0] Decode batch. #running-req: 1, #token: 3047, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:42:40,870 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:41,707 - sglang - INFO - [2025-07-20 17:42:41 TP0] Decode batch. #running-req: 1, #token: 3087, token usage: 0.08, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:42:41,707 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:42,550 - sglang - INFO - [2025-07-20 17:42:42 TP0] Decode batch. #running-req: 1, #token: 3127, token usage: 0.08, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:42:42,550 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:43,392 - sglang - INFO - [2025-07-20 17:42:43 TP0] Decode batch. #running-req: 1, #token: 3167, token usage: 0.08, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:42:43,392 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:44,225 - sglang - INFO - [2025-07-20 17:42:44 TP0] Decode batch. #running-req: 1, #token: 3207, token usage: 0.08, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:42:44,225 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:45,061 - sglang - INFO - [2025-07-20 17:42:45 TP0] Decode batch. #running-req: 1, #token: 3247, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:42:45,061 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:45,902 - sglang - INFO - [2025-07-20 17:42:45 TP0] Decode batch. #running-req: 1, #token: 3287, token usage: 0.09, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:42:45,902 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:46,740 - sglang - INFO - [2025-07-20 17:42:46 TP0] Decode batch. #running-req: 1, #token: 3327, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:42:46,741 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:46,925 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:46,926 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 435.59 548.22
- finished_output_tokens 104.61 130.29
- sglang_input_tokens 459.62 520.76
- sglang_output_tokens 119.23 139.87
- 2025-07-20 17:42:46,926 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:47,578 - sglang - INFO - [2025-07-20 17:42:47 TP0] Decode batch. #running-req: 1, #token: 3367, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:42:47,578 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:48,419 - sglang - INFO - [2025-07-20 17:42:48 TP0] Decode batch. #running-req: 1, #token: 3407, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:42:48,419 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:49,264 - sglang - INFO - [2025-07-20 17:42:49 TP0] Decode batch. #running-req: 1, #token: 3447, token usage: 0.09, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:42:49,264 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:50,108 - sglang - INFO - [2025-07-20 17:42:50 TP0] Decode batch. #running-req: 1, #token: 3487, token usage: 0.09, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:42:50,108 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:50,941 - sglang - INFO - [2025-07-20 17:42:50 TP0] Decode batch. #running-req: 1, #token: 3527, token usage: 0.09, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:42:50,941 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:51,775 - sglang - INFO - [2025-07-20 17:42:51 TP0] Decode batch. #running-req: 1, #token: 3567, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:42:51,775 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:52,615 - sglang - INFO - [2025-07-20 17:42:52 TP0] Decode batch. #running-req: 1, #token: 3607, token usage: 0.09, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:42:52,615 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:53,453 - sglang - INFO - [2025-07-20 17:42:53 TP0] Decode batch. #running-req: 1, #token: 3647, token usage: 0.10, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:42:53,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:54,292 - sglang - INFO - [2025-07-20 17:42:54 TP0] Decode batch. #running-req: 1, #token: 3687, token usage: 0.10, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:42:54,292 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:55,132 - sglang - INFO - [2025-07-20 17:42:55 TP0] Decode batch. #running-req: 1, #token: 3727, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:42:55,132 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:55,976 - sglang - INFO - [2025-07-20 17:42:55 TP0] Decode batch. #running-req: 1, #token: 3767, token usage: 0.10, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:42:55,976 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:56,822 - sglang - INFO - [2025-07-20 17:42:56 TP0] Decode batch. #running-req: 1, #token: 3807, token usage: 0.10, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:42:56,822 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:56,928 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:42:56,928 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 431.60 548.22
- finished_output_tokens 103.65 130.29
- sglang_input_tokens 455.41 520.76
- sglang_output_tokens 118.14 139.87
- 2025-07-20 17:42:56,928 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:42:57,661 - sglang - INFO - [2025-07-20 17:42:57 TP0] Decode batch. #running-req: 1, #token: 3847, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:42:57,662 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:58,497 - sglang - INFO - [2025-07-20 17:42:58 TP0] Decode batch. #running-req: 1, #token: 3887, token usage: 0.10, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:42:58,497 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:42:59,336 - sglang - INFO - [2025-07-20 17:42:59 TP0] Decode batch. #running-req: 1, #token: 3927, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:42:59,336 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:00,178 - sglang - INFO - [2025-07-20 17:43:00 TP0] Decode batch. #running-req: 1, #token: 3967, token usage: 0.10, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:43:00,179 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:01,022 - sglang - INFO - [2025-07-20 17:43:01 TP0] Decode batch. #running-req: 1, #token: 4007, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:43:01,022 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:01,865 - sglang - INFO - [2025-07-20 17:43:01 TP0] Decode batch. #running-req: 1, #token: 4047, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:43:01,865 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:02,709 - sglang - INFO - [2025-07-20 17:43:02 TP0] Decode batch. #running-req: 1, #token: 4087, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:43:02,710 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:03,556 - sglang - INFO - [2025-07-20 17:43:03 TP0] Decode batch. #running-req: 1, #token: 4127, token usage: 0.11, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:43:03,556 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:04,394 - sglang - INFO - [2025-07-20 17:43:04 TP0] Decode batch. #running-req: 1, #token: 4167, token usage: 0.11, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:43:04,394 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:05,231 - sglang - INFO - [2025-07-20 17:43:05 TP0] Decode batch. #running-req: 1, #token: 4207, token usage: 0.11, gen throughput (token/s): 47.80, #queue-req: 0
- 2025-07-20 17:43:05,231 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:06,070 - sglang - INFO - [2025-07-20 17:43:06 TP0] Decode batch. #running-req: 1, #token: 4247, token usage: 0.11, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:43:06,071 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:06,910 - sglang - INFO - [2025-07-20 17:43:06 TP0] Decode batch. #running-req: 1, #token: 4287, token usage: 0.11, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:43:06,911 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:06,930 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:06,930 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 427.68 548.22
- finished_output_tokens 102.71 130.29
- sglang_input_tokens 451.27 520.76
- sglang_output_tokens 117.07 139.87
- 2025-07-20 17:43:06,930 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:07,755 - sglang - INFO - [2025-07-20 17:43:07 TP0] Decode batch. #running-req: 1, #token: 4327, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:43:07,755 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:08,599 - sglang - INFO - [2025-07-20 17:43:08 TP0] Decode batch. #running-req: 1, #token: 4367, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:43:08,599 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:09,445 - sglang - INFO - [2025-07-20 17:43:09 TP0] Decode batch. #running-req: 1, #token: 4407, token usage: 0.12, gen throughput (token/s): 47.25, #queue-req: 0
- 2025-07-20 17:43:09,446 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:10,293 - sglang - INFO - [2025-07-20 17:43:10 TP0] Decode batch. #running-req: 1, #token: 4447, token usage: 0.12, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:43:10,293 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:11,134 - sglang - INFO - [2025-07-20 17:43:11 TP0] Decode batch. #running-req: 1, #token: 4487, token usage: 0.12, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:43:11,134 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:11,973 - sglang - INFO - [2025-07-20 17:43:11 TP0] Decode batch. #running-req: 1, #token: 4527, token usage: 0.12, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:43:11,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:12,813 - sglang - INFO - [2025-07-20 17:43:12 TP0] Decode batch. #running-req: 1, #token: 4567, token usage: 0.12, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:43:12,813 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:13,655 - sglang - INFO - [2025-07-20 17:43:13 TP0] Decode batch. #running-req: 1, #token: 4607, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:43:13,655 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:14,499 - sglang - INFO - [2025-07-20 17:43:14 TP0] Decode batch. #running-req: 1, #token: 4647, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:43:14,499 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:15,340 - sglang - INFO - [2025-07-20 17:43:15 TP0] Decode batch. #running-req: 1, #token: 4687, token usage: 0.12, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:43:15,340 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:16,183 - sglang - INFO - [2025-07-20 17:43:16 TP0] Decode batch. #running-req: 1, #token: 4727, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:43:16,183 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:16,932 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:16,932 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 423.83 548.22
- finished_output_tokens 101.79 130.29
- sglang_input_tokens 447.21 520.76
- sglang_output_tokens 116.01 139.87
- 2025-07-20 17:43:16,933 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:17,029 - sglang - INFO - [2025-07-20 17:43:17 TP0] Decode batch. #running-req: 1, #token: 4767, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:43:17,029 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:17,868 - sglang - INFO - [2025-07-20 17:43:17 TP0] Decode batch. #running-req: 1, #token: 4807, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:43:17,869 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:18,706 - sglang - INFO - [2025-07-20 17:43:18 TP0] Decode batch. #running-req: 1, #token: 4847, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:43:18,706 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:19,549 - sglang - INFO - [2025-07-20 17:43:19 TP0] Decode batch. #running-req: 1, #token: 4887, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:43:19,549 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:20,396 - sglang - INFO - [2025-07-20 17:43:20 TP0] Decode batch. #running-req: 1, #token: 4927, token usage: 0.13, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:43:20,396 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:21,239 - sglang - INFO - [2025-07-20 17:43:21 TP0] Decode batch. #running-req: 1, #token: 4967, token usage: 0.13, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:43:21,239 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:22,077 - sglang - INFO - [2025-07-20 17:43:22 TP0] Decode batch. #running-req: 1, #token: 5007, token usage: 0.13, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:43:22,077 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:22,917 - sglang - INFO - [2025-07-20 17:43:22 TP0] Decode batch. #running-req: 1, #token: 5047, token usage: 0.13, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:43:22,917 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:23,764 - sglang - INFO - [2025-07-20 17:43:23 TP0] Decode batch. #running-req: 1, #token: 5087, token usage: 0.13, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-07-20 17:43:23,764 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:24,610 - sglang - INFO - [2025-07-20 17:43:24 TP0] Decode batch. #running-req: 1, #token: 5127, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:43:24,610 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:25,451 - sglang - INFO - [2025-07-20 17:43:25 TP0] Decode batch. #running-req: 1, #token: 5167, token usage: 0.14, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:43:25,451 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:26,292 - sglang - INFO - [2025-07-20 17:43:26 TP0] Decode batch. #running-req: 1, #token: 5207, token usage: 0.14, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:43:26,292 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:26,934 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:26,935 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 420.04 548.22
- finished_output_tokens 100.88 130.29
- sglang_input_tokens 443.21 520.76
- sglang_output_tokens 114.98 139.87
- 2025-07-20 17:43:26,935 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:27,140 - sglang - INFO - [2025-07-20 17:43:27 TP0] Decode batch. #running-req: 1, #token: 5247, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-07-20 17:43:27,140 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:27,986 - sglang - INFO - [2025-07-20 17:43:27 TP0] Decode batch. #running-req: 1, #token: 5287, token usage: 0.14, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:43:27,986 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:28,827 - sglang - INFO - [2025-07-20 17:43:28 TP0] Decode batch. #running-req: 1, #token: 5327, token usage: 0.14, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:43:28,827 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:29,667 - sglang - INFO - [2025-07-20 17:43:29 TP0] Decode batch. #running-req: 1, #token: 5367, token usage: 0.14, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:43:29,668 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:30,507 - sglang - INFO - [2025-07-20 17:43:30 TP0] Decode batch. #running-req: 1, #token: 5407, token usage: 0.14, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:43:30,508 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:31,355 - sglang - INFO - [2025-07-20 17:43:31 TP0] Decode batch. #running-req: 1, #token: 5447, token usage: 0.14, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-07-20 17:43:31,355 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:32,198 - sglang - INFO - [2025-07-20 17:43:32 TP0] Decode batch. #running-req: 1, #token: 5487, token usage: 0.14, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:43:32,198 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:33,040 - sglang - INFO - [2025-07-20 17:43:33 TP0] Decode batch. #running-req: 1, #token: 5527, token usage: 0.15, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:43:33,040 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:33,883 - sglang - INFO - [2025-07-20 17:43:33 TP0] Decode batch. #running-req: 1, #token: 5567, token usage: 0.15, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:43:33,884 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:34,730 - sglang - INFO - [2025-07-20 17:43:34 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:43:34,730 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:34,736 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:43:34,924 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:43:35,115 - sglang - INFO - [2025-07-20 17:43:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:43:35,116 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:43:36,727 - sglang - INFO - [2025-07-20 17:43:36 TP0] Decode batch. #running-req: 1, #token: 2648, token usage: 0.07, gen throughput (token/s): 20.03, #queue-req: 0
- 2025-07-20 17:43:36,727 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:36,937 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:36,937 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 416.33 358.35
- finished_output_tokens 99.98 86.68
- sglang_input_tokens 441.60 432.38
- sglang_output_tokens 116.61 119.35
- 2025-07-20 17:43:36,938 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:37,565 - sglang - INFO - [2025-07-20 17:43:37 TP0] Decode batch. #running-req: 1, #token: 2688, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:43:37,565 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:38,408 - sglang - INFO - [2025-07-20 17:43:38 TP0] Decode batch. #running-req: 1, #token: 2728, token usage: 0.07, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:43:38,408 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:39,244 - sglang - INFO - [2025-07-20 17:43:39 TP0] Decode batch. #running-req: 1, #token: 2768, token usage: 0.07, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:43:39,244 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:40,076 - sglang - INFO - [2025-07-20 17:43:40 TP0] Decode batch. #running-req: 1, #token: 2808, token usage: 0.07, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:43:40,076 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:40,913 - sglang - INFO - [2025-07-20 17:43:40 TP0] Decode batch. #running-req: 1, #token: 2848, token usage: 0.07, gen throughput (token/s): 47.80, #queue-req: 0
- 2025-07-20 17:43:40,913 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:41,750 - sglang - INFO - [2025-07-20 17:43:41 TP0] Decode batch. #running-req: 1, #token: 2888, token usage: 0.08, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:43:41,750 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:42,587 - sglang - INFO - [2025-07-20 17:43:42 TP0] Decode batch. #running-req: 1, #token: 2928, token usage: 0.08, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:43:42,588 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:43,427 - sglang - INFO - [2025-07-20 17:43:43 TP0] Decode batch. #running-req: 1, #token: 2968, token usage: 0.08, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:43:43,427 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:44,268 - sglang - INFO - [2025-07-20 17:43:44 TP0] Decode batch. #running-req: 1, #token: 3008, token usage: 0.08, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:43:44,268 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:45,111 - sglang - INFO - [2025-07-20 17:43:45 TP0] Decode batch. #running-req: 1, #token: 3048, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:43:45,111 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:45,949 - sglang - INFO - [2025-07-20 17:43:45 TP0] Decode batch. #running-req: 1, #token: 3088, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:43:45,950 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:46,781 - sglang - INFO - [2025-07-20 17:43:46 TP0] Decode batch. #running-req: 1, #token: 3128, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:43:46,781 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:46,939 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:46,939 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 412.68 358.35
- finished_output_tokens 99.11 86.68
- sglang_input_tokens 437.73 432.38
- sglang_output_tokens 115.59 119.35
- 2025-07-20 17:43:46,939 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:47,614 - sglang - INFO - [2025-07-20 17:43:47 TP0] Decode batch. #running-req: 1, #token: 3168, token usage: 0.08, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:43:47,614 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:48,455 - sglang - INFO - [2025-07-20 17:43:48 TP0] Decode batch. #running-req: 1, #token: 3208, token usage: 0.08, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:43:48,455 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:49,293 - sglang - INFO - [2025-07-20 17:43:49 TP0] Decode batch. #running-req: 1, #token: 3248, token usage: 0.09, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:43:49,293 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:50,131 - sglang - INFO - [2025-07-20 17:43:50 TP0] Decode batch. #running-req: 1, #token: 3288, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:43:50,132 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:50,970 - sglang - INFO - [2025-07-20 17:43:50 TP0] Decode batch. #running-req: 1, #token: 3328, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:43:50,970 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:51,814 - sglang - INFO - [2025-07-20 17:43:51 TP0] Decode batch. #running-req: 1, #token: 3368, token usage: 0.09, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:43:51,814 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:52,658 - sglang - INFO - [2025-07-20 17:43:52 TP0] Decode batch. #running-req: 1, #token: 3408, token usage: 0.09, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:43:52,658 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:53,493 - sglang - INFO - [2025-07-20 17:43:53 TP0] Decode batch. #running-req: 1, #token: 3448, token usage: 0.09, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:43:53,493 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:54,326 - sglang - INFO - [2025-07-20 17:43:54 TP0] Decode batch. #running-req: 1, #token: 3488, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:43:54,326 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:55,165 - sglang - INFO - [2025-07-20 17:43:55 TP0] Decode batch. #running-req: 1, #token: 3528, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:43:55,165 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:56,004 - sglang - INFO - [2025-07-20 17:43:56 TP0] Decode batch. #running-req: 1, #token: 3568, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:43:56,004 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:56,842 - sglang - INFO - [2025-07-20 17:43:56 TP0] Decode batch. #running-req: 1, #token: 3608, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:43:56,843 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:56,940 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:43:56,941 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 409.09 358.35
- finished_output_tokens 98.25 86.68
- sglang_input_tokens 433.92 432.38
- sglang_output_tokens 114.58 119.35
- 2025-07-20 17:43:56,941 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:43:57,683 - sglang - INFO - [2025-07-20 17:43:57 TP0] Decode batch. #running-req: 1, #token: 3648, token usage: 0.10, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:43:57,684 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:58,528 - sglang - INFO - [2025-07-20 17:43:58 TP0] Decode batch. #running-req: 1, #token: 3688, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:43:58,528 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:43:59,374 - sglang - INFO - [2025-07-20 17:43:59 TP0] Decode batch. #running-req: 1, #token: 3728, token usage: 0.10, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:43:59,374 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:00,212 - sglang - INFO - [2025-07-20 17:44:00 TP0] Decode batch. #running-req: 1, #token: 3768, token usage: 0.10, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:44:00,212 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:01,047 - sglang - INFO - [2025-07-20 17:44:01 TP0] Decode batch. #running-req: 1, #token: 3808, token usage: 0.10, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:44:01,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:01,884 - sglang - INFO - [2025-07-20 17:44:01 TP0] Decode batch. #running-req: 1, #token: 3848, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:44:01,884 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:02,726 - sglang - INFO - [2025-07-20 17:44:02 TP0] Decode batch. #running-req: 1, #token: 3888, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:44:02,726 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:03,569 - sglang - INFO - [2025-07-20 17:44:03 TP0] Decode batch. #running-req: 1, #token: 3928, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:44:03,569 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:04,414 - sglang - INFO - [2025-07-20 17:44:04 TP0] Decode batch. #running-req: 1, #token: 3968, token usage: 0.10, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:44:04,414 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:05,259 - sglang - INFO - [2025-07-20 17:44:05 TP0] Decode batch. #running-req: 1, #token: 4008, token usage: 0.11, gen throughput (token/s): 47.33, #queue-req: 0
- 2025-07-20 17:44:05,259 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:06,105 - sglang - INFO - [2025-07-20 17:44:06 TP0] Decode batch. #running-req: 1, #token: 4048, token usage: 0.11, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:44:06,105 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:06,944 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:06,944 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 405.56 358.35
- finished_output_tokens 97.40 86.68
- sglang_input_tokens 430.18 432.38
- sglang_output_tokens 113.60 119.35
- 2025-07-20 17:44:06,944 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:06,944 - sglang - INFO - [2025-07-20 17:44:06 TP0] Decode batch. #running-req: 1, #token: 4088, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:44:06,944 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:07,781 - sglang - INFO - [2025-07-20 17:44:07 TP0] Decode batch. #running-req: 1, #token: 4128, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:44:07,781 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:08,618 - sglang - INFO - [2025-07-20 17:44:08 TP0] Decode batch. #running-req: 1, #token: 4168, token usage: 0.11, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:44:08,618 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:09,462 - sglang - INFO - [2025-07-20 17:44:09 TP0] Decode batch. #running-req: 1, #token: 4208, token usage: 0.11, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:44:09,462 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:10,307 - sglang - INFO - [2025-07-20 17:44:10 TP0] Decode batch. #running-req: 1, #token: 4248, token usage: 0.11, gen throughput (token/s): 47.36, #queue-req: 0
- 2025-07-20 17:44:10,307 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:11,149 - sglang - INFO - [2025-07-20 17:44:11 TP0] Decode batch. #running-req: 1, #token: 4288, token usage: 0.11, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:44:11,150 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:11,992 - sglang - INFO - [2025-07-20 17:44:11 TP0] Decode batch. #running-req: 1, #token: 4328, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:44:11,992 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:12,839 - sglang - INFO - [2025-07-20 17:44:12 TP0] Decode batch. #running-req: 1, #token: 4368, token usage: 0.11, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-07-20 17:44:12,840 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:13,682 - sglang - INFO - [2025-07-20 17:44:13 TP0] Decode batch. #running-req: 1, #token: 4408, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:44:13,682 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:14,521 - sglang - INFO - [2025-07-20 17:44:14 TP0] Decode batch. #running-req: 1, #token: 4448, token usage: 0.12, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:44:14,521 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:15,360 - sglang - INFO - [2025-07-20 17:44:15 TP0] Decode batch. #running-req: 1, #token: 4488, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:44:15,360 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:16,205 - sglang - INFO - [2025-07-20 17:44:16 TP0] Decode batch. #running-req: 1, #token: 4528, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
- 2025-07-20 17:44:16,206 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:16,945 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:16,946 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 402.10 358.35
- finished_output_tokens 96.57 86.68
- sglang_input_tokens 426.51 432.38
- sglang_output_tokens 112.63 119.35
- 2025-07-20 17:44:16,946 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:17,047 - sglang - INFO - [2025-07-20 17:44:17 TP0] Decode batch. #running-req: 1, #token: 4568, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:44:17,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:17,886 - sglang - INFO - [2025-07-20 17:44:17 TP0] Decode batch. #running-req: 1, #token: 4608, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:44:17,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:18,724 - sglang - INFO - [2025-07-20 17:44:18 TP0] Decode batch. #running-req: 1, #token: 4648, token usage: 0.12, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:44:18,724 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:19,571 - sglang - INFO - [2025-07-20 17:44:19 TP0] Decode batch. #running-req: 1, #token: 4688, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-07-20 17:44:19,572 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:20,417 - sglang - INFO - [2025-07-20 17:44:20 TP0] Decode batch. #running-req: 1, #token: 4728, token usage: 0.12, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:44:20,417 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:21,255 - sglang - INFO - [2025-07-20 17:44:21 TP0] Decode batch. #running-req: 1, #token: 4768, token usage: 0.13, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:44:21,256 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:22,095 - sglang - INFO - [2025-07-20 17:44:22 TP0] Decode batch. #running-req: 1, #token: 4808, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:44:22,095 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:22,940 - sglang - INFO - [2025-07-20 17:44:22 TP0] Decode batch. #running-req: 1, #token: 4848, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:44:22,940 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:23,779 - sglang - INFO - [2025-07-20 17:44:23 TP0] Decode batch. #running-req: 1, #token: 4888, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:44:23,780 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:24,617 - sglang - INFO - [2025-07-20 17:44:24 TP0] Decode batch. #running-req: 1, #token: 4928, token usage: 0.13, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:44:24,617 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:25,454 - sglang - INFO - [2025-07-20 17:44:25 TP0] Decode batch. #running-req: 1, #token: 4968, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:44:25,454 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:26,293 - sglang - INFO - [2025-07-20 17:44:26 TP0] Decode batch. #running-req: 1, #token: 5008, token usage: 0.13, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:44:26,294 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:26,947 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:26,947 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 398.69 358.35
- finished_output_tokens 95.75 86.68
- sglang_input_tokens 422.89 432.38
- sglang_output_tokens 111.67 119.35
- 2025-07-20 17:44:26,948 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:27,141 - sglang - INFO - [2025-07-20 17:44:27 TP0] Decode batch. #running-req: 1, #token: 5048, token usage: 0.13, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-07-20 17:44:27,141 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:27,982 - sglang - INFO - [2025-07-20 17:44:27 TP0] Decode batch. #running-req: 1, #token: 5088, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:44:27,982 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:28,822 - sglang - INFO - [2025-07-20 17:44:28 TP0] Decode batch. #running-req: 1, #token: 5128, token usage: 0.13, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:44:28,823 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:29,668 - sglang - INFO - [2025-07-20 17:44:29 TP0] Decode batch. #running-req: 1, #token: 5168, token usage: 0.14, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:44:29,668 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:30,510 - sglang - INFO - [2025-07-20 17:44:30 TP0] Decode batch. #running-req: 1, #token: 5208, token usage: 0.14, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:44:30,510 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:31,348 - sglang - INFO - [2025-07-20 17:44:31 TP0] Decode batch. #running-req: 1, #token: 5248, token usage: 0.14, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:44:31,349 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:32,188 - sglang - INFO - [2025-07-20 17:44:32 TP0] Decode batch. #running-req: 1, #token: 5288, token usage: 0.14, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:44:32,189 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:33,036 - sglang - INFO - [2025-07-20 17:44:33 TP0] Decode batch. #running-req: 1, #token: 5328, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-07-20 17:44:33,036 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:33,888 - sglang - INFO - [2025-07-20 17:44:33 TP0] Decode batch. #running-req: 1, #token: 5368, token usage: 0.14, gen throughput (token/s): 46.93, #queue-req: 0
- 2025-07-20 17:44:33,889 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:34,731 - sglang - INFO - [2025-07-20 17:44:34 TP0] Decode batch. #running-req: 1, #token: 5408, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:44:34,732 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:35,573 - sglang - INFO - [2025-07-20 17:44:35 TP0] Decode batch. #running-req: 1, #token: 5448, token usage: 0.14, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:44:35,573 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:36,419 - sglang - INFO - [2025-07-20 17:44:36 TP0] Decode batch. #running-req: 1, #token: 5488, token usage: 0.14, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:44:36,419 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:36,949 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:36,949 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 395.34 358.35
- finished_output_tokens 94.94 86.68
- sglang_input_tokens 419.34 432.38
- sglang_output_tokens 110.73 119.35
- 2025-07-20 17:44:36,949 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:37,266 - sglang - INFO - [2025-07-20 17:44:37 TP0] Decode batch. #running-req: 1, #token: 5528, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:44:37,266 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:38,113 - sglang - INFO - [2025-07-20 17:44:38 TP0] Decode batch. #running-req: 1, #token: 5568, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:44:38,113 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:38,948 - __main__ - WARNING - JSON decode error on attempt 2 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:44:39,136 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:44:39,280 - sglang - INFO - [2025-07-20 17:44:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:44:39,280 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:44:40,082 - sglang - INFO - [2025-07-20 17:44:40 TP0] Decode batch. #running-req: 1, #token: 2609, token usage: 0.07, gen throughput (token/s): 20.32, #queue-req: 0
- 2025-07-20 17:44:40,082 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:40,927 - sglang - INFO - [2025-07-20 17:44:40 TP0] Decode batch. #running-req: 1, #token: 2649, token usage: 0.07, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:44:40,927 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:41,762 - sglang - INFO - [2025-07-20 17:44:41 TP0] Decode batch. #running-req: 1, #token: 2689, token usage: 0.07, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:44:41,763 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:42,596 - sglang - INFO - [2025-07-20 17:44:42 TP0] Decode batch. #running-req: 1, #token: 2729, token usage: 0.07, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-07-20 17:44:42,597 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:43,435 - sglang - INFO - [2025-07-20 17:44:43 TP0] Decode batch. #running-req: 1, #token: 2769, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:44:43,436 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:44,273 - sglang - INFO - [2025-07-20 17:44:44 TP0] Decode batch. #running-req: 1, #token: 2809, token usage: 0.07, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:44:44,273 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:45,116 - sglang - INFO - [2025-07-20 17:44:45 TP0] Decode batch. #running-req: 1, #token: 2849, token usage: 0.07, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:44:45,116 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:45,956 - sglang - INFO - [2025-07-20 17:44:45 TP0] Decode batch. #running-req: 1, #token: 2889, token usage: 0.08, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:44:45,956 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:46,798 - sglang - INFO - [2025-07-20 17:44:46 TP0] Decode batch. #running-req: 1, #token: 2929, token usage: 0.08, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:44:46,799 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:46,950 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:46,951 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 392.05 214.66
- finished_output_tokens 94.15 55.51
- sglang_input_tokens 418.02 297.38
- sglang_output_tokens 112.31 98.17
- 2025-07-20 17:44:46,951 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:47,645 - sglang - INFO - [2025-07-20 17:44:47 TP0] Decode batch. #running-req: 1, #token: 2969, token usage: 0.08, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:44:47,645 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:48,483 - sglang - INFO - [2025-07-20 17:44:48 TP0] Decode batch. #running-req: 1, #token: 3009, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:44:48,484 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:49,320 - sglang - INFO - [2025-07-20 17:44:49 TP0] Decode batch. #running-req: 1, #token: 3049, token usage: 0.08, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:44:49,320 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:50,158 - sglang - INFO - [2025-07-20 17:44:50 TP0] Decode batch. #running-req: 1, #token: 3089, token usage: 0.08, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:44:50,158 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:51,001 - sglang - INFO - [2025-07-20 17:44:51 TP0] Decode batch. #running-req: 1, #token: 3129, token usage: 0.08, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:44:51,001 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:51,843 - sglang - INFO - [2025-07-20 17:44:51 TP0] Decode batch. #running-req: 1, #token: 3169, token usage: 0.08, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:44:51,843 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:52,684 - sglang - INFO - [2025-07-20 17:44:52 TP0] Decode batch. #running-req: 1, #token: 3209, token usage: 0.08, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:44:52,685 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:53,529 - sglang - INFO - [2025-07-20 17:44:53 TP0] Decode batch. #running-req: 1, #token: 3249, token usage: 0.09, gen throughput (token/s): 47.36, #queue-req: 0
- 2025-07-20 17:44:53,529 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:54,376 - sglang - INFO - [2025-07-20 17:44:54 TP0] Decode batch. #running-req: 1, #token: 3289, token usage: 0.09, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:44:54,376 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:55,214 - sglang - INFO - [2025-07-20 17:44:55 TP0] Decode batch. #running-req: 1, #token: 3329, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:44:55,214 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:56,048 - sglang - INFO - [2025-07-20 17:44:56 TP0] Decode batch. #running-req: 1, #token: 3369, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:44:56,048 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:56,886 - sglang - INFO - [2025-07-20 17:44:56 TP0] Decode batch. #running-req: 1, #token: 3409, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:44:56,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:56,952 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:44:56,952 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 388.81 214.66
- finished_output_tokens 93.38 55.51
- sglang_input_tokens 414.57 297.38
- sglang_output_tokens 111.38 98.17
- 2025-07-20 17:44:56,953 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:44:57,729 - sglang - INFO - [2025-07-20 17:44:57 TP0] Decode batch. #running-req: 1, #token: 3449, token usage: 0.09, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:44:57,729 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:58,570 - sglang - INFO - [2025-07-20 17:44:58 TP0] Decode batch. #running-req: 1, #token: 3489, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:44:58,570 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:44:59,416 - sglang - INFO - [2025-07-20 17:44:59 TP0] Decode batch. #running-req: 1, #token: 3529, token usage: 0.09, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:44:59,416 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:00,262 - sglang - INFO - [2025-07-20 17:45:00 TP0] Decode batch. #running-req: 1, #token: 3569, token usage: 0.09, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:45:00,262 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:01,110 - sglang - INFO - [2025-07-20 17:45:01 TP0] Decode batch. #running-req: 1, #token: 3609, token usage: 0.10, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-07-20 17:45:01,110 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:01,952 - sglang - INFO - [2025-07-20 17:45:01 TP0] Decode batch. #running-req: 1, #token: 3649, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:45:01,953 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:02,791 - sglang - INFO - [2025-07-20 17:45:02 TP0] Decode batch. #running-req: 1, #token: 3689, token usage: 0.10, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:45:02,791 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:03,628 - sglang - INFO - [2025-07-20 17:45:03 TP0] Decode batch. #running-req: 1, #token: 3729, token usage: 0.10, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:45:03,629 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:04,475 - sglang - INFO - [2025-07-20 17:45:04 TP0] Decode batch. #running-req: 1, #token: 3769, token usage: 0.10, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:45:04,476 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:05,317 - sglang - INFO - [2025-07-20 17:45:05 TP0] Decode batch. #running-req: 1, #token: 3809, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:45:05,317 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:06,157 - sglang - INFO - [2025-07-20 17:45:06 TP0] Decode batch. #running-req: 1, #token: 3849, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:45:06,157 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:06,955 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:06,955 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 385.63 214.66
- finished_output_tokens 92.61 55.51
- sglang_input_tokens 411.17 297.38
- sglang_output_tokens 110.47 98.17
- 2025-07-20 17:45:06,955 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:06,996 - sglang - INFO - [2025-07-20 17:45:06 TP0] Decode batch. #running-req: 1, #token: 3889, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:45:06,996 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:07,840 - sglang - INFO - [2025-07-20 17:45:07 TP0] Decode batch. #running-req: 1, #token: 3929, token usage: 0.10, gen throughput (token/s): 47.36, #queue-req: 0
- 2025-07-20 17:45:07,841 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:08,685 - sglang - INFO - [2025-07-20 17:45:08 TP0] Decode batch. #running-req: 1, #token: 3969, token usage: 0.10, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:45:08,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:09,526 - sglang - INFO - [2025-07-20 17:45:09 TP0] Decode batch. #running-req: 1, #token: 4009, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:45:09,526 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:10,362 - sglang - INFO - [2025-07-20 17:45:10 TP0] Decode batch. #running-req: 1, #token: 4049, token usage: 0.11, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:45:10,363 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:11,204 - sglang - INFO - [2025-07-20 17:45:11 TP0] Decode batch. #running-req: 1, #token: 4089, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:45:11,204 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:12,047 - sglang - INFO - [2025-07-20 17:45:12 TP0] Decode batch. #running-req: 1, #token: 4129, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:45:12,047 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:12,886 - sglang - INFO - [2025-07-20 17:45:12 TP0] Decode batch. #running-req: 1, #token: 4169, token usage: 0.11, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:45:12,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:13,724 - sglang - INFO - [2025-07-20 17:45:13 TP0] Decode batch. #running-req: 1, #token: 4209, token usage: 0.11, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:45:13,725 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:14,567 - sglang - INFO - [2025-07-20 17:45:14 TP0] Decode batch. #running-req: 1, #token: 4249, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:45:14,567 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:15,416 - sglang - INFO - [2025-07-20 17:45:15 TP0] Decode batch. #running-req: 1, #token: 4289, token usage: 0.11, gen throughput (token/s): 47.12, #queue-req: 0
- 2025-07-20 17:45:15,416 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:16,258 - sglang - INFO - [2025-07-20 17:45:16 TP0] Decode batch. #running-req: 1, #token: 4329, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:45:16,258 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:16,957 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:16,957 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 382.49 214.66
- finished_output_tokens 91.86 55.51
- sglang_input_tokens 407.83 297.38
- sglang_output_tokens 109.57 98.17
- 2025-07-20 17:45:16,957 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:17,096 - sglang - INFO - [2025-07-20 17:45:17 TP0] Decode batch. #running-req: 1, #token: 4369, token usage: 0.12, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:45:17,096 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:17,938 - sglang - INFO - [2025-07-20 17:45:17 TP0] Decode batch. #running-req: 1, #token: 4409, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:45:17,938 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:18,785 - sglang - INFO - [2025-07-20 17:45:18 TP0] Decode batch. #running-req: 1, #token: 4449, token usage: 0.12, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:45:18,785 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:19,624 - sglang - INFO - [2025-07-20 17:45:19 TP0] Decode batch. #running-req: 1, #token: 4489, token usage: 0.12, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:45:19,624 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:20,462 - sglang - INFO - [2025-07-20 17:45:20 TP0] Decode batch. #running-req: 1, #token: 4529, token usage: 0.12, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-07-20 17:45:20,463 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:21,301 - sglang - INFO - [2025-07-20 17:45:21 TP0] Decode batch. #running-req: 1, #token: 4569, token usage: 0.12, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:45:21,302 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:22,148 - sglang - INFO - [2025-07-20 17:45:22 TP0] Decode batch. #running-req: 1, #token: 4609, token usage: 0.12, gen throughput (token/s): 47.25, #queue-req: 0
- 2025-07-20 17:45:22,148 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:22,991 - sglang - INFO - [2025-07-20 17:45:22 TP0] Decode batch. #running-req: 1, #token: 4649, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:45:22,991 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:23,830 - sglang - INFO - [2025-07-20 17:45:23 TP0] Decode batch. #running-req: 1, #token: 4689, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:45:23,830 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:24,672 - sglang - INFO - [2025-07-20 17:45:24 TP0] Decode batch. #running-req: 1, #token: 4729, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:45:24,673 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:25,521 - sglang - INFO - [2025-07-20 17:45:25 TP0] Decode batch. #running-req: 1, #token: 4769, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
- 2025-07-20 17:45:25,522 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:26,362 - sglang - INFO - [2025-07-20 17:45:26 TP0] Decode batch. #running-req: 1, #token: 4809, token usage: 0.13, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:45:26,362 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:26,959 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:26,959 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 379.41 214.66
- finished_output_tokens 91.12 55.51
- sglang_input_tokens 404.54 297.38
- sglang_output_tokens 108.69 98.17
- 2025-07-20 17:45:26,959 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:27,202 - sglang - INFO - [2025-07-20 17:45:27 TP0] Decode batch. #running-req: 1, #token: 4849, token usage: 0.13, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:45:27,203 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:28,044 - sglang - INFO - [2025-07-20 17:45:28 TP0] Decode batch. #running-req: 1, #token: 4889, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:45:28,044 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:28,894 - sglang - INFO - [2025-07-20 17:45:28 TP0] Decode batch. #running-req: 1, #token: 4929, token usage: 0.13, gen throughput (token/s): 47.05, #queue-req: 0
- 2025-07-20 17:45:28,894 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:29,745 - sglang - INFO - [2025-07-20 17:45:29 TP0] Decode batch. #running-req: 1, #token: 4969, token usage: 0.13, gen throughput (token/s): 47.02, #queue-req: 0
- 2025-07-20 17:45:29,745 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:30,587 - sglang - INFO - [2025-07-20 17:45:30 TP0] Decode batch. #running-req: 1, #token: 5009, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:45:30,587 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:31,428 - sglang - INFO - [2025-07-20 17:45:31 TP0] Decode batch. #running-req: 1, #token: 5049, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:45:31,428 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:32,274 - sglang - INFO - [2025-07-20 17:45:32 TP0] Decode batch. #running-req: 1, #token: 5089, token usage: 0.13, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:45:32,275 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:33,120 - sglang - INFO - [2025-07-20 17:45:33 TP0] Decode batch. #running-req: 1, #token: 5129, token usage: 0.14, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:45:33,120 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:33,965 - sglang - INFO - [2025-07-20 17:45:33 TP0] Decode batch. #running-req: 1, #token: 5169, token usage: 0.14, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:45:33,965 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:34,812 - sglang - INFO - [2025-07-20 17:45:34 TP0] Decode batch. #running-req: 1, #token: 5209, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-07-20 17:45:34,813 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:35,664 - sglang - INFO - [2025-07-20 17:45:35 TP0] Decode batch. #running-req: 1, #token: 5249, token usage: 0.14, gen throughput (token/s): 46.99, #queue-req: 0
- 2025-07-20 17:45:35,664 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:36,519 - sglang - INFO - [2025-07-20 17:45:36 TP0] Decode batch. #running-req: 1, #token: 5289, token usage: 0.14, gen throughput (token/s): 46.74, #queue-req: 0
- 2025-07-20 17:45:36,520 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:36,960 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:36,961 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 376.37 214.66
- finished_output_tokens 90.39 55.51
- sglang_input_tokens 401.31 297.38
- sglang_output_tokens 107.82 98.17
- 2025-07-20 17:45:36,961 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:37,365 - sglang - INFO - [2025-07-20 17:45:37 TP0] Decode batch. #running-req: 1, #token: 5329, token usage: 0.14, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:45:37,365 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:38,208 - sglang - INFO - [2025-07-20 17:45:38 TP0] Decode batch. #running-req: 1, #token: 5369, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:45:38,208 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:39,058 - sglang - INFO - [2025-07-20 17:45:39 TP0] Decode batch. #running-req: 1, #token: 5409, token usage: 0.14, gen throughput (token/s): 47.09, #queue-req: 0
- 2025-07-20 17:45:39,058 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:39,906 - sglang - INFO - [2025-07-20 17:45:39 TP0] Decode batch. #running-req: 1, #token: 5449, token usage: 0.14, gen throughput (token/s): 47.15, #queue-req: 0
- 2025-07-20 17:45:39,906 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:40,754 - sglang - INFO - [2025-07-20 17:45:40 TP0] Decode batch. #running-req: 1, #token: 5489, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-07-20 17:45:40,754 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:41,604 - sglang - INFO - [2025-07-20 17:45:41 TP0] Decode batch. #running-req: 1, #token: 5529, token usage: 0.15, gen throughput (token/s): 47.07, #queue-req: 0
- 2025-07-20 17:45:41,604 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:42,456 - sglang - INFO - [2025-07-20 17:45:42 TP0] Decode batch. #running-req: 1, #token: 5569, token usage: 0.15, gen throughput (token/s): 46.95, #queue-req: 0
- 2025-07-20 17:45:42,456 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:43,285 - __main__ - WARNING - JSON decode error on attempt 3 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:45:43,473 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:45:43,635 - sglang - INFO - [2025-07-20 17:45:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:45:43,636 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:45:44,458 - sglang - INFO - [2025-07-20 17:45:44 TP0] Decode batch. #running-req: 1, #token: 2610, token usage: 0.07, gen throughput (token/s): 19.97, #queue-req: 0
- 2025-07-20 17:45:44,459 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:45,291 - sglang - INFO - [2025-07-20 17:45:45 TP0] Decode batch. #running-req: 1, #token: 2650, token usage: 0.07, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:45:45,291 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:46,129 - sglang - INFO - [2025-07-20 17:45:46 TP0] Decode batch. #running-req: 1, #token: 2690, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:45:46,129 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:46,962 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:46,962 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 373.39 121.86
- finished_output_tokens 89.67 31.49
- sglang_input_tokens 400.19 177.49
- sglang_output_tokens 109.34 74.28
- 2025-07-20 17:45:46,962 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:46,966 - sglang - INFO - [2025-07-20 17:45:46 TP0] Decode batch. #running-req: 1, #token: 2730, token usage: 0.07, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:45:46,966 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:47,804 - sglang - INFO - [2025-07-20 17:45:47 TP0] Decode batch. #running-req: 1, #token: 2770, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:45:47,805 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:48,644 - sglang - INFO - [2025-07-20 17:45:48 TP0] Decode batch. #running-req: 1, #token: 2810, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:45:48,644 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:49,486 - sglang - INFO - [2025-07-20 17:45:49 TP0] Decode batch. #running-req: 1, #token: 2850, token usage: 0.08, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:45:49,487 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:50,329 - sglang - INFO - [2025-07-20 17:45:50 TP0] Decode batch. #running-req: 1, #token: 2890, token usage: 0.08, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:45:50,330 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:51,168 - sglang - INFO - [2025-07-20 17:45:51 TP0] Decode batch. #running-req: 1, #token: 2930, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:45:51,169 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:52,001 - sglang - INFO - [2025-07-20 17:45:52 TP0] Decode batch. #running-req: 1, #token: 2970, token usage: 0.08, gen throughput (token/s): 48.05, #queue-req: 0
- 2025-07-20 17:45:52,001 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:52,837 - sglang - INFO - [2025-07-20 17:45:52 TP0] Decode batch. #running-req: 1, #token: 3010, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:45:52,838 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:53,679 - sglang - INFO - [2025-07-20 17:45:53 TP0] Decode batch. #running-req: 1, #token: 3050, token usage: 0.08, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-07-20 17:45:53,679 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:54,517 - sglang - INFO - [2025-07-20 17:45:54 TP0] Decode batch. #running-req: 1, #token: 3090, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:45:54,517 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:55,359 - sglang - INFO - [2025-07-20 17:45:55 TP0] Decode batch. #running-req: 1, #token: 3130, token usage: 0.08, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:45:55,359 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:56,203 - sglang - INFO - [2025-07-20 17:45:56 TP0] Decode batch. #running-req: 1, #token: 3170, token usage: 0.08, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:45:56,203 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:56,964 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:45:56,964 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 370.45 121.86
- finished_output_tokens 88.97 31.49
- sglang_input_tokens 397.04 177.49
- sglang_output_tokens 108.48 74.28
- 2025-07-20 17:45:56,964 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:45:57,048 - sglang - INFO - [2025-07-20 17:45:57 TP0] Decode batch. #running-req: 1, #token: 3210, token usage: 0.08, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:45:57,049 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:57,884 - sglang - INFO - [2025-07-20 17:45:57 TP0] Decode batch. #running-req: 1, #token: 3250, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:45:57,884 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:58,717 - sglang - INFO - [2025-07-20 17:45:58 TP0] Decode batch. #running-req: 1, #token: 3290, token usage: 0.09, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:45:58,718 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:45:59,555 - sglang - INFO - [2025-07-20 17:45:59 TP0] Decode batch. #running-req: 1, #token: 3330, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:45:59,555 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:00,396 - sglang - INFO - [2025-07-20 17:46:00 TP0] Decode batch. #running-req: 1, #token: 3370, token usage: 0.09, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:46:00,396 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:01,230 - sglang - INFO - [2025-07-20 17:46:01 TP0] Decode batch. #running-req: 1, #token: 3410, token usage: 0.09, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:46:01,231 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:02,065 - sglang - INFO - [2025-07-20 17:46:02 TP0] Decode batch. #running-req: 1, #token: 3450, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:46:02,065 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:02,901 - sglang - INFO - [2025-07-20 17:46:02 TP0] Decode batch. #running-req: 1, #token: 3490, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:46:02,901 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:03,744 - sglang - INFO - [2025-07-20 17:46:03 TP0] Decode batch. #running-req: 1, #token: 3530, token usage: 0.09, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:46:03,744 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:04,585 - sglang - INFO - [2025-07-20 17:46:04 TP0] Decode batch. #running-req: 1, #token: 3570, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:46:04,585 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:05,419 - sglang - INFO - [2025-07-20 17:46:05 TP0] Decode batch. #running-req: 1, #token: 3610, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:46:05,419 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:06,254 - sglang - INFO - [2025-07-20 17:46:06 TP0] Decode batch. #running-req: 1, #token: 3650, token usage: 0.10, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:46:06,254 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:06,965 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:06,966 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 367.56 121.86
- finished_output_tokens 88.27 31.49
- sglang_input_tokens 393.94 177.49
- sglang_output_tokens 107.64 74.28
- 2025-07-20 17:46:06,966 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:07,096 - sglang - INFO - [2025-07-20 17:46:07 TP0] Decode batch. #running-req: 1, #token: 3690, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:46:07,096 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:07,932 - sglang - INFO - [2025-07-20 17:46:07 TP0] Decode batch. #running-req: 1, #token: 3730, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:46:07,933 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:08,768 - sglang - INFO - [2025-07-20 17:46:08 TP0] Decode batch. #running-req: 1, #token: 3770, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:46:08,769 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:09,605 - sglang - INFO - [2025-07-20 17:46:09 TP0] Decode batch. #running-req: 1, #token: 3810, token usage: 0.10, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:46:09,606 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:10,450 - sglang - INFO - [2025-07-20 17:46:10 TP0] Decode batch. #running-req: 1, #token: 3850, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:46:10,450 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:11,290 - sglang - INFO - [2025-07-20 17:46:11 TP0] Decode batch. #running-req: 1, #token: 3890, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:46:11,290 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:12,125 - sglang - INFO - [2025-07-20 17:46:12 TP0] Decode batch. #running-req: 1, #token: 3930, token usage: 0.10, gen throughput (token/s): 47.87, #queue-req: 0
- 2025-07-20 17:46:12,125 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:12,962 - sglang - INFO - [2025-07-20 17:46:12 TP0] Decode batch. #running-req: 1, #token: 3970, token usage: 0.10, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:46:12,962 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:13,803 - sglang - INFO - [2025-07-20 17:46:13 TP0] Decode batch. #running-req: 1, #token: 4010, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:46:13,803 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:14,640 - sglang - INFO - [2025-07-20 17:46:14 TP0] Decode batch. #running-req: 1, #token: 4050, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:46:14,641 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:15,475 - sglang - INFO - [2025-07-20 17:46:15 TP0] Decode batch. #running-req: 1, #token: 4090, token usage: 0.11, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:46:15,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:16,310 - sglang - INFO - [2025-07-20 17:46:16 TP0] Decode batch. #running-req: 1, #token: 4130, token usage: 0.11, gen throughput (token/s): 47.90, #queue-req: 0
- 2025-07-20 17:46:16,310 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:16,967 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:16,968 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 364.71 121.86
- finished_output_tokens 87.59 31.49
- sglang_input_tokens 390.89 177.49
- sglang_output_tokens 106.80 74.28
- 2025-07-20 17:46:16,968 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:17,147 - sglang - INFO - [2025-07-20 17:46:17 TP0] Decode batch. #running-req: 1, #token: 4170, token usage: 0.11, gen throughput (token/s): 47.81, #queue-req: 0
- 2025-07-20 17:46:17,147 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:17,989 - sglang - INFO - [2025-07-20 17:46:17 TP0] Decode batch. #running-req: 1, #token: 4210, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:46:17,989 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:18,829 - sglang - INFO - [2025-07-20 17:46:18 TP0] Decode batch. #running-req: 1, #token: 4250, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:46:18,829 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:19,669 - sglang - INFO - [2025-07-20 17:46:19 TP0] Decode batch. #running-req: 1, #token: 4290, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:46:19,669 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:20,510 - sglang - INFO - [2025-07-20 17:46:20 TP0] Decode batch. #running-req: 1, #token: 4330, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:46:20,511 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:21,346 - sglang - INFO - [2025-07-20 17:46:21 TP0] Decode batch. #running-req: 1, #token: 4370, token usage: 0.12, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:46:21,346 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:22,184 - sglang - INFO - [2025-07-20 17:46:22 TP0] Decode batch. #running-req: 1, #token: 4410, token usage: 0.12, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:46:22,185 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:23,021 - sglang - INFO - [2025-07-20 17:46:23 TP0] Decode batch. #running-req: 1, #token: 4450, token usage: 0.12, gen throughput (token/s): 47.80, #queue-req: 0
- 2025-07-20 17:46:23,021 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:23,865 - sglang - INFO - [2025-07-20 17:46:23 TP0] Decode batch. #running-req: 1, #token: 4490, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:46:23,865 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:24,712 - sglang - INFO - [2025-07-20 17:46:24 TP0] Decode batch. #running-req: 1, #token: 4530, token usage: 0.12, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:46:24,712 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:25,549 - sglang - INFO - [2025-07-20 17:46:25 TP0] Decode batch. #running-req: 1, #token: 4570, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:46:25,550 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:26,387 - sglang - INFO - [2025-07-20 17:46:26 TP0] Decode batch. #running-req: 1, #token: 4610, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:46:26,388 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:26,969 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:26,969 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 361.90 121.86
- finished_output_tokens 86.91 31.49
- sglang_input_tokens 387.88 177.49
- sglang_output_tokens 105.98 74.28
- 2025-07-20 17:46:26,969 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:27,230 - sglang - INFO - [2025-07-20 17:46:27 TP0] Decode batch. #running-req: 1, #token: 4650, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:46:27,231 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:28,072 - sglang - INFO - [2025-07-20 17:46:28 TP0] Decode batch. #running-req: 1, #token: 4690, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:46:28,072 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:28,915 - sglang - INFO - [2025-07-20 17:46:28 TP0] Decode batch. #running-req: 1, #token: 4730, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:46:28,915 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:29,758 - sglang - INFO - [2025-07-20 17:46:29 TP0] Decode batch. #running-req: 1, #token: 4770, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:46:29,758 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:30,606 - sglang - INFO - [2025-07-20 17:46:30 TP0] Decode batch. #running-req: 1, #token: 4810, token usage: 0.13, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-07-20 17:46:30,606 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:31,457 - sglang - INFO - [2025-07-20 17:46:31 TP0] Decode batch. #running-req: 1, #token: 4850, token usage: 0.13, gen throughput (token/s): 47.01, #queue-req: 0
- 2025-07-20 17:46:31,457 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:32,301 - sglang - INFO - [2025-07-20 17:46:32 TP0] Decode batch. #running-req: 1, #token: 4890, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:46:32,302 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:33,139 - sglang - INFO - [2025-07-20 17:46:33 TP0] Decode batch. #running-req: 1, #token: 4930, token usage: 0.13, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:46:33,139 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:33,983 - sglang - INFO - [2025-07-20 17:46:33 TP0] Decode batch. #running-req: 1, #token: 4970, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:46:33,984 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:34,828 - sglang - INFO - [2025-07-20 17:46:34 TP0] Decode batch. #running-req: 1, #token: 5010, token usage: 0.13, gen throughput (token/s): 47.36, #queue-req: 0
- 2025-07-20 17:46:34,828 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:35,671 - sglang - INFO - [2025-07-20 17:46:35 TP0] Decode batch. #running-req: 1, #token: 5050, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:46:35,671 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:36,514 - sglang - INFO - [2025-07-20 17:46:36 TP0] Decode batch. #running-req: 1, #token: 5090, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:46:36,515 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:36,971 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:36,971 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 359.14 121.86
- finished_output_tokens 86.25 31.49
- sglang_input_tokens 384.92 177.49
- sglang_output_tokens 105.17 74.28
- 2025-07-20 17:46:36,971 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:37,362 - sglang - INFO - [2025-07-20 17:46:37 TP0] Decode batch. #running-req: 1, #token: 5130, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-07-20 17:46:37,362 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:38,213 - sglang - INFO - [2025-07-20 17:46:38 TP0] Decode batch. #running-req: 1, #token: 5170, token usage: 0.14, gen throughput (token/s): 46.99, #queue-req: 0
- 2025-07-20 17:46:38,214 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:39,059 - sglang - INFO - [2025-07-20 17:46:39 TP0] Decode batch. #running-req: 1, #token: 5210, token usage: 0.14, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:46:39,059 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:39,898 - sglang - INFO - [2025-07-20 17:46:39 TP0] Decode batch. #running-req: 1, #token: 5250, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:46:39,898 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:40,742 - sglang - INFO - [2025-07-20 17:46:40 TP0] Decode batch. #running-req: 1, #token: 5290, token usage: 0.14, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:46:40,742 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:41,591 - sglang - INFO - [2025-07-20 17:46:41 TP0] Decode batch. #running-req: 1, #token: 5330, token usage: 0.14, gen throughput (token/s): 47.10, #queue-req: 0
- 2025-07-20 17:46:41,591 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:42,437 - sglang - INFO - [2025-07-20 17:46:42 TP0] Decode batch. #running-req: 1, #token: 5370, token usage: 0.14, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:46:42,437 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:43,287 - sglang - INFO - [2025-07-20 17:46:43 TP0] Decode batch. #running-req: 1, #token: 5410, token usage: 0.14, gen throughput (token/s): 47.07, #queue-req: 0
- 2025-07-20 17:46:43,287 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:44,136 - sglang - INFO - [2025-07-20 17:46:44 TP0] Decode batch. #running-req: 1, #token: 5450, token usage: 0.14, gen throughput (token/s): 47.11, #queue-req: 0
- 2025-07-20 17:46:44,136 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:44,990 - sglang - INFO - [2025-07-20 17:46:44 TP0] Decode batch. #running-req: 1, #token: 5490, token usage: 0.14, gen throughput (token/s): 46.84, #queue-req: 0
- 2025-07-20 17:46:44,990 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:45,843 - sglang - INFO - [2025-07-20 17:46:45 TP0] Decode batch. #running-req: 1, #token: 5530, token usage: 0.15, gen throughput (token/s): 46.91, #queue-req: 0
- 2025-07-20 17:46:45,843 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:46,685 - sglang - INFO - [2025-07-20 17:46:46 TP0] Decode batch. #running-req: 1, #token: 5570, token usage: 0.15, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:46:46,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:46,973 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:46,974 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 356.42 121.86
- finished_output_tokens 85.60 31.49
- sglang_input_tokens 382.01 177.49
- sglang_output_tokens 104.38 74.28
- 2025-07-20 17:46:46,974 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:47,471 - __main__ - WARNING - JSON decode error on attempt 4 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:46:47,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:46:47,823 - sglang - INFO - [2025-07-20 17:46:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:46:47,823 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:46:48,666 - sglang - INFO - [2025-07-20 17:46:48 TP0] Decode batch. #running-req: 1, #token: 2611, token usage: 0.07, gen throughput (token/s): 20.19, #queue-req: 0
- 2025-07-20 17:46:48,666 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:49,504 - sglang - INFO - [2025-07-20 17:46:49 TP0] Decode batch. #running-req: 1, #token: 2651, token usage: 0.07, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:46:49,504 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:50,343 - sglang - INFO - [2025-07-20 17:46:50 TP0] Decode batch. #running-req: 1, #token: 2691, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:46:50,343 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:51,182 - sglang - INFO - [2025-07-20 17:46:51 TP0] Decode batch. #running-req: 1, #token: 2731, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:46:51,183 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:52,024 - sglang - INFO - [2025-07-20 17:46:52 TP0] Decode batch. #running-req: 1, #token: 2771, token usage: 0.07, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:46:52,025 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:52,864 - sglang - INFO - [2025-07-20 17:46:52 TP0] Decode batch. #running-req: 1, #token: 2811, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:46:52,864 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:53,695 - sglang - INFO - [2025-07-20 17:46:53 TP0] Decode batch. #running-req: 1, #token: 2851, token usage: 0.08, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:46:53,695 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:54,527 - sglang - INFO - [2025-07-20 17:46:54 TP0] Decode batch. #running-req: 1, #token: 2891, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:46:54,527 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:55,365 - sglang - INFO - [2025-07-20 17:46:55 TP0] Decode batch. #running-req: 1, #token: 2931, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-07-20 17:46:55,365 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:56,201 - sglang - INFO - [2025-07-20 17:46:56 TP0] Decode batch. #running-req: 1, #token: 2971, token usage: 0.08, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:46:56,201 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:56,976 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:46:56,976 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 353.74 0.00
- finished_output_tokens 84.95 0.00
- sglang_input_tokens 381.09 51.19
- sglang_output_tokens 105.85 52.98
- 2025-07-20 17:46:56,976 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:46:57,036 - sglang - INFO - [2025-07-20 17:46:57 TP0] Decode batch. #running-req: 1, #token: 3011, token usage: 0.08, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:46:57,037 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:57,871 - sglang - INFO - [2025-07-20 17:46:57 TP0] Decode batch. #running-req: 1, #token: 3051, token usage: 0.08, gen throughput (token/s): 47.93, #queue-req: 0
- 2025-07-20 17:46:57,871 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:58,714 - sglang - INFO - [2025-07-20 17:46:58 TP0] Decode batch. #running-req: 1, #token: 3091, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:46:58,715 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:46:59,552 - sglang - INFO - [2025-07-20 17:46:59 TP0] Decode batch. #running-req: 1, #token: 3131, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:46:59,552 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:00,388 - sglang - INFO - [2025-07-20 17:47:00 TP0] Decode batch. #running-req: 1, #token: 3171, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:47:00,389 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:01,220 - sglang - INFO - [2025-07-20 17:47:01 TP0] Decode batch. #running-req: 1, #token: 3211, token usage: 0.08, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:47:01,221 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:02,061 - sglang - INFO - [2025-07-20 17:47:02 TP0] Decode batch. #running-req: 1, #token: 3251, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:47:02,061 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:02,902 - sglang - INFO - [2025-07-20 17:47:02 TP0] Decode batch. #running-req: 1, #token: 3291, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:47:02,902 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:03,737 - sglang - INFO - [2025-07-20 17:47:03 TP0] Decode batch. #running-req: 1, #token: 3331, token usage: 0.09, gen throughput (token/s): 47.88, #queue-req: 0
- 2025-07-20 17:47:03,737 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:04,571 - sglang - INFO - [2025-07-20 17:47:04 TP0] Decode batch. #running-req: 1, #token: 3371, token usage: 0.09, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:47:04,571 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:05,412 - sglang - INFO - [2025-07-20 17:47:05 TP0] Decode batch. #running-req: 1, #token: 3411, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:47:05,412 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:06,253 - sglang - INFO - [2025-07-20 17:47:06 TP0] Decode batch. #running-req: 1, #token: 3451, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:47:06,253 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:06,978 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:06,978 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 351.10 0.00
- finished_output_tokens 84.32 0.00
- sglang_input_tokens 378.25 51.19
- sglang_output_tokens 105.06 52.98
- 2025-07-20 17:47:06,979 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:07,086 - sglang - INFO - [2025-07-20 17:47:07 TP0] Decode batch. #running-req: 1, #token: 3491, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:47:07,086 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:07,918 - sglang - INFO - [2025-07-20 17:47:07 TP0] Decode batch. #running-req: 1, #token: 3531, token usage: 0.09, gen throughput (token/s): 48.07, #queue-req: 0
- 2025-07-20 17:47:07,918 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:08,757 - sglang - INFO - [2025-07-20 17:47:08 TP0] Decode batch. #running-req: 1, #token: 3571, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:47:08,757 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:09,595 - sglang - INFO - [2025-07-20 17:47:09 TP0] Decode batch. #running-req: 1, #token: 3611, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:47:09,596 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:10,428 - sglang - INFO - [2025-07-20 17:47:10 TP0] Decode batch. #running-req: 1, #token: 3651, token usage: 0.10, gen throughput (token/s): 48.03, #queue-req: 0
- 2025-07-20 17:47:10,428 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:11,259 - sglang - INFO - [2025-07-20 17:47:11 TP0] Decode batch. #running-req: 1, #token: 3691, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
- 2025-07-20 17:47:11,260 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:12,092 - sglang - INFO - [2025-07-20 17:47:12 TP0] Decode batch. #running-req: 1, #token: 3731, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:47:12,092 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:12,932 - sglang - INFO - [2025-07-20 17:47:12 TP0] Decode batch. #running-req: 1, #token: 3771, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:47:12,932 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:13,770 - sglang - INFO - [2025-07-20 17:47:13 TP0] Decode batch. #running-req: 1, #token: 3811, token usage: 0.10, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:47:13,770 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:14,603 - sglang - INFO - [2025-07-20 17:47:14 TP0] Decode batch. #running-req: 1, #token: 3851, token usage: 0.10, gen throughput (token/s): 48.02, #queue-req: 0
- 2025-07-20 17:47:14,603 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:15,440 - sglang - INFO - [2025-07-20 17:47:15 TP0] Decode batch. #running-req: 1, #token: 3891, token usage: 0.10, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-07-20 17:47:15,440 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:16,278 - sglang - INFO - [2025-07-20 17:47:16 TP0] Decode batch. #running-req: 1, #token: 3931, token usage: 0.10, gen throughput (token/s): 47.73, #queue-req: 0
- 2025-07-20 17:47:16,278 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:16,980 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:16,980 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 348.50 0.00
- finished_output_tokens 83.70 0.00
- sglang_input_tokens 375.45 51.19
- sglang_output_tokens 104.28 52.98
- 2025-07-20 17:47:16,980 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:17,113 - sglang - INFO - [2025-07-20 17:47:17 TP0] Decode batch. #running-req: 1, #token: 3971, token usage: 0.10, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:47:17,114 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:17,947 - sglang - INFO - [2025-07-20 17:47:17 TP0] Decode batch. #running-req: 1, #token: 4011, token usage: 0.11, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:47:17,948 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:18,785 - sglang - INFO - [2025-07-20 17:47:18 TP0] Decode batch. #running-req: 1, #token: 4051, token usage: 0.11, gen throughput (token/s): 47.79, #queue-req: 0
- 2025-07-20 17:47:18,785 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:19,630 - sglang - INFO - [2025-07-20 17:47:19 TP0] Decode batch. #running-req: 1, #token: 4091, token usage: 0.11, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:47:19,630 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:20,471 - sglang - INFO - [2025-07-20 17:47:20 TP0] Decode batch. #running-req: 1, #token: 4131, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:47:20,471 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:21,304 - sglang - INFO - [2025-07-20 17:47:21 TP0] Decode batch. #running-req: 1, #token: 4171, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:47:21,305 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:22,142 - sglang - INFO - [2025-07-20 17:47:22 TP0] Decode batch. #running-req: 1, #token: 4211, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:47:22,142 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:22,985 - sglang - INFO - [2025-07-20 17:47:22 TP0] Decode batch. #running-req: 1, #token: 4251, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:47:22,985 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:23,825 - sglang - INFO - [2025-07-20 17:47:23 TP0] Decode batch. #running-req: 1, #token: 4291, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:47:23,826 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:24,666 - sglang - INFO - [2025-07-20 17:47:24 TP0] Decode batch. #running-req: 1, #token: 4331, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:47:24,666 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:25,510 - sglang - INFO - [2025-07-20 17:47:25 TP0] Decode batch. #running-req: 1, #token: 4371, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:47:25,510 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:26,357 - sglang - INFO - [2025-07-20 17:47:26 TP0] Decode batch. #running-req: 1, #token: 4411, token usage: 0.12, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-07-20 17:47:26,358 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:26,981 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:26,981 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 345.94 0.00
- finished_output_tokens 83.08 0.00
- sglang_input_tokens 372.69 51.19
- sglang_output_tokens 103.51 52.98
- 2025-07-20 17:47:26,982 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:27,201 - sglang - INFO - [2025-07-20 17:47:27 TP0] Decode batch. #running-req: 1, #token: 4451, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:47:27,201 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:28,036 - sglang - INFO - [2025-07-20 17:47:28 TP0] Decode batch. #running-req: 1, #token: 4491, token usage: 0.12, gen throughput (token/s): 47.89, #queue-req: 0
- 2025-07-20 17:47:28,036 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:28,872 - sglang - INFO - [2025-07-20 17:47:28 TP0] Decode batch. #running-req: 1, #token: 4531, token usage: 0.12, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:47:28,873 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:29,716 - sglang - INFO - [2025-07-20 17:47:29 TP0] Decode batch. #running-req: 1, #token: 4571, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:47:29,716 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:30,557 - sglang - INFO - [2025-07-20 17:47:30 TP0] Decode batch. #running-req: 1, #token: 4611, token usage: 0.12, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:47:30,557 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:31,399 - sglang - INFO - [2025-07-20 17:47:31 TP0] Decode batch. #running-req: 1, #token: 4651, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-07-20 17:47:31,399 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:32,245 - sglang - INFO - [2025-07-20 17:47:32 TP0] Decode batch. #running-req: 1, #token: 4691, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
- 2025-07-20 17:47:32,245 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:33,093 - sglang - INFO - [2025-07-20 17:47:33 TP0] Decode batch. #running-req: 1, #token: 4731, token usage: 0.12, gen throughput (token/s): 47.14, #queue-req: 0
- 2025-07-20 17:47:33,094 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:33,942 - sglang - INFO - [2025-07-20 17:47:33 TP0] Decode batch. #running-req: 1, #token: 4771, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
- 2025-07-20 17:47:33,943 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:34,783 - sglang - INFO - [2025-07-20 17:47:34 TP0] Decode batch. #running-req: 1, #token: 4811, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:47:34,784 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:35,620 - sglang - INFO - [2025-07-20 17:47:35 TP0] Decode batch. #running-req: 1, #token: 4851, token usage: 0.13, gen throughput (token/s): 47.81, #queue-req: 0
- 2025-07-20 17:47:35,620 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:36,463 - sglang - INFO - [2025-07-20 17:47:36 TP0] Decode batch. #running-req: 1, #token: 4891, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:47:36,464 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:36,983 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:36,983 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 343.42 0.00
- finished_output_tokens 82.47 0.00
- sglang_input_tokens 369.97 51.19
- sglang_output_tokens 102.76 52.98
- 2025-07-20 17:47:36,983 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:37,305 - sglang - INFO - [2025-07-20 17:47:37 TP0] Decode batch. #running-req: 1, #token: 4931, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:47:37,305 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:38,151 - sglang - INFO - [2025-07-20 17:47:38 TP0] Decode batch. #running-req: 1, #token: 4971, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:47:38,151 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:38,996 - sglang - INFO - [2025-07-20 17:47:38 TP0] Decode batch. #running-req: 1, #token: 5011, token usage: 0.13, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:47:38,997 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:39,845 - sglang - INFO - [2025-07-20 17:47:39 TP0] Decode batch. #running-req: 1, #token: 5051, token usage: 0.13, gen throughput (token/s): 47.14, #queue-req: 0
- 2025-07-20 17:47:39,845 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:40,695 - sglang - INFO - [2025-07-20 17:47:40 TP0] Decode batch. #running-req: 1, #token: 5091, token usage: 0.13, gen throughput (token/s): 47.06, #queue-req: 0
- 2025-07-20 17:47:40,695 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:41,536 - sglang - INFO - [2025-07-20 17:47:41 TP0] Decode batch. #running-req: 1, #token: 5131, token usage: 0.14, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:47:41,536 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:42,373 - sglang - INFO - [2025-07-20 17:47:42 TP0] Decode batch. #running-req: 1, #token: 5171, token usage: 0.14, gen throughput (token/s): 47.77, #queue-req: 0
- 2025-07-20 17:47:42,373 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:43,217 - sglang - INFO - [2025-07-20 17:47:43 TP0] Decode batch. #running-req: 1, #token: 5211, token usage: 0.14, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:47:43,217 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:44,060 - sglang - INFO - [2025-07-20 17:47:44 TP0] Decode batch. #running-req: 1, #token: 5251, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:47:44,060 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:44,908 - sglang - INFO - [2025-07-20 17:47:44 TP0] Decode batch. #running-req: 1, #token: 5291, token usage: 0.14, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-07-20 17:47:44,908 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:45,756 - sglang - INFO - [2025-07-20 17:47:45 TP0] Decode batch. #running-req: 1, #token: 5331, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-07-20 17:47:45,756 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:46,607 - sglang - INFO - [2025-07-20 17:47:46 TP0] Decode batch. #running-req: 1, #token: 5371, token usage: 0.14, gen throughput (token/s): 47.00, #queue-req: 0
- 2025-07-20 17:47:46,608 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:46,985 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:46,985 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 340.93 0.00
- finished_output_tokens 81.88 0.00
- sglang_input_tokens 367.29 51.19
- sglang_output_tokens 102.01 52.98
- 2025-07-20 17:47:46,985 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:47,460 - sglang - INFO - [2025-07-20 17:47:47 TP0] Decode batch. #running-req: 1, #token: 5411, token usage: 0.14, gen throughput (token/s): 46.91, #queue-req: 0
- 2025-07-20 17:47:47,460 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:48,304 - sglang - INFO - [2025-07-20 17:47:48 TP0] Decode batch. #running-req: 1, #token: 5451, token usage: 0.14, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:47:48,305 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:49,145 - sglang - INFO - [2025-07-20 17:47:49 TP0] Decode batch. #running-req: 1, #token: 5491, token usage: 0.14, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:47:49,145 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:49,991 - sglang - INFO - [2025-07-20 17:47:49 TP0] Decode batch. #running-req: 1, #token: 5531, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:47:49,991 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:50,837 - sglang - INFO - [2025-07-20 17:47:50 TP0] Decode batch. #running-req: 1, #token: 5571, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-07-20 17:47:50,837 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:51,604 - __main__ - WARNING - JSON decode error on attempt 5 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:47:51,790 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:47:51,951 - sglang - INFO - [2025-07-20 17:47:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:47:51,951 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:47:52,814 - sglang - INFO - [2025-07-20 17:47:52 TP0] Decode batch. #running-req: 1, #token: 2612, token usage: 0.07, gen throughput (token/s): 20.23, #queue-req: 0
- 2025-07-20 17:47:52,815 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:53,654 - sglang - INFO - [2025-07-20 17:47:53 TP0] Decode batch. #running-req: 1, #token: 2652, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:47:53,654 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:54,493 - sglang - INFO - [2025-07-20 17:47:54 TP0] Decode batch. #running-req: 1, #token: 2692, token usage: 0.07, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-07-20 17:47:54,494 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:55,324 - sglang - INFO - [2025-07-20 17:47:55 TP0] Decode batch. #running-req: 1, #token: 2732, token usage: 0.07, gen throughput (token/s): 48.13, #queue-req: 0
- 2025-07-20 17:47:55,325 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:56,156 - sglang - INFO - [2025-07-20 17:47:56 TP0] Decode batch. #running-req: 1, #token: 2772, token usage: 0.07, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:47:56,157 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:56,987 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:47:56,987 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 338.48 0.00
- finished_output_tokens 81.29 0.00
- sglang_input_tokens 366.53 43.47
- sglang_output_tokens 103.44 50.00
- 2025-07-20 17:47:56,988 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:47:56,992 - sglang - INFO - [2025-07-20 17:47:56 TP0] Decode batch. #running-req: 1, #token: 2812, token usage: 0.07, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:47:56,992 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:57,831 - sglang - INFO - [2025-07-20 17:47:57 TP0] Decode batch. #running-req: 1, #token: 2852, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:47:57,831 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:58,670 - sglang - INFO - [2025-07-20 17:47:58 TP0] Decode batch. #running-req: 1, #token: 2892, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:47:58,670 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:47:59,503 - sglang - INFO - [2025-07-20 17:47:59 TP0] Decode batch. #running-req: 1, #token: 2932, token usage: 0.08, gen throughput (token/s): 48.01, #queue-req: 0
- 2025-07-20 17:47:59,503 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:00,339 - sglang - INFO - [2025-07-20 17:48:00 TP0] Decode batch. #running-req: 1, #token: 2972, token usage: 0.08, gen throughput (token/s): 47.83, #queue-req: 0
- 2025-07-20 17:48:00,340 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:01,179 - sglang - INFO - [2025-07-20 17:48:01 TP0] Decode batch. #running-req: 1, #token: 3012, token usage: 0.08, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:48:01,179 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:02,012 - sglang - INFO - [2025-07-20 17:48:02 TP0] Decode batch. #running-req: 1, #token: 3052, token usage: 0.08, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:48:02,013 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:02,843 - sglang - INFO - [2025-07-20 17:48:02 TP0] Decode batch. #running-req: 1, #token: 3092, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:48:02,843 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:03,678 - sglang - INFO - [2025-07-20 17:48:03 TP0] Decode batch. #running-req: 1, #token: 3132, token usage: 0.08, gen throughput (token/s): 47.91, #queue-req: 0
- 2025-07-20 17:48:03,678 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:04,516 - sglang - INFO - [2025-07-20 17:48:04 TP0] Decode batch. #running-req: 1, #token: 3172, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:48:04,516 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:05,346 - sglang - INFO - [2025-07-20 17:48:05 TP0] Decode batch. #running-req: 1, #token: 3212, token usage: 0.08, gen throughput (token/s): 48.14, #queue-req: 0
- 2025-07-20 17:48:05,347 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:06,177 - sglang - INFO - [2025-07-20 17:48:06 TP0] Decode batch. #running-req: 1, #token: 3252, token usage: 0.09, gen throughput (token/s): 48.17, #queue-req: 0
- 2025-07-20 17:48:06,177 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:06,989 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:06,989 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 336.06 0.00
- finished_output_tokens 80.71 0.00
- sglang_input_tokens 363.91 43.47
- sglang_output_tokens 102.70 50.00
- 2025-07-20 17:48:06,989 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:07,007 - sglang - INFO - [2025-07-20 17:48:07 TP0] Decode batch. #running-req: 1, #token: 3292, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:48:07,007 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:07,843 - sglang - INFO - [2025-07-20 17:48:07 TP0] Decode batch. #running-req: 1, #token: 3332, token usage: 0.09, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:48:07,843 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:08,680 - sglang - INFO - [2025-07-20 17:48:08 TP0] Decode batch. #running-req: 1, #token: 3372, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:48:08,681 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:09,512 - sglang - INFO - [2025-07-20 17:48:09 TP0] Decode batch. #running-req: 1, #token: 3412, token usage: 0.09, gen throughput (token/s): 48.08, #queue-req: 0
- 2025-07-20 17:48:09,513 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:10,347 - sglang - INFO - [2025-07-20 17:48:10 TP0] Decode batch. #running-req: 1, #token: 3452, token usage: 0.09, gen throughput (token/s): 47.96, #queue-req: 0
- 2025-07-20 17:48:10,347 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:11,188 - sglang - INFO - [2025-07-20 17:48:11 TP0] Decode batch. #running-req: 1, #token: 3492, token usage: 0.09, gen throughput (token/s): 47.54, #queue-req: 0
- 2025-07-20 17:48:11,188 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:12,022 - sglang - INFO - [2025-07-20 17:48:12 TP0] Decode batch. #running-req: 1, #token: 3532, token usage: 0.09, gen throughput (token/s): 47.98, #queue-req: 0
- 2025-07-20 17:48:12,022 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:12,853 - sglang - INFO - [2025-07-20 17:48:12 TP0] Decode batch. #running-req: 1, #token: 3572, token usage: 0.09, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:48:12,854 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:13,686 - sglang - INFO - [2025-07-20 17:48:13 TP0] Decode batch. #running-req: 1, #token: 3612, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:48:13,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:14,526 - sglang - INFO - [2025-07-20 17:48:14 TP0] Decode batch. #running-req: 1, #token: 3652, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-07-20 17:48:14,527 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:15,370 - sglang - INFO - [2025-07-20 17:48:15 TP0] Decode batch. #running-req: 1, #token: 3692, token usage: 0.10, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:48:15,371 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:16,204 - sglang - INFO - [2025-07-20 17:48:16 TP0] Decode batch. #running-req: 1, #token: 3732, token usage: 0.10, gen throughput (token/s): 48.00, #queue-req: 0
- 2025-07-20 17:48:16,204 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:16,990 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:16,991 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 333.68 0.00
- finished_output_tokens 80.14 0.00
- sglang_input_tokens 361.33 43.47
- sglang_output_tokens 101.97 50.00
- 2025-07-20 17:48:16,991 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:17,036 - sglang - INFO - [2025-07-20 17:48:17 TP0] Decode batch. #running-req: 1, #token: 3772, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
- 2025-07-20 17:48:17,037 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:17,875 - sglang - INFO - [2025-07-20 17:48:17 TP0] Decode batch. #running-req: 1, #token: 3812, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:48:17,875 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:18,713 - sglang - INFO - [2025-07-20 17:48:18 TP0] Decode batch. #running-req: 1, #token: 3852, token usage: 0.10, gen throughput (token/s): 47.72, #queue-req: 0
- 2025-07-20 17:48:18,714 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:19,555 - sglang - INFO - [2025-07-20 17:48:19 TP0] Decode batch. #running-req: 1, #token: 3892, token usage: 0.10, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:48:19,556 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:20,395 - sglang - INFO - [2025-07-20 17:48:20 TP0] Decode batch. #running-req: 1, #token: 3932, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:48:20,395 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:21,238 - sglang - INFO - [2025-07-20 17:48:21 TP0] Decode batch. #running-req: 1, #token: 3972, token usage: 0.10, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:48:21,239 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:22,085 - sglang - INFO - [2025-07-20 17:48:22 TP0] Decode batch. #running-req: 1, #token: 4012, token usage: 0.11, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:48:22,085 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:22,924 - sglang - INFO - [2025-07-20 17:48:22 TP0] Decode batch. #running-req: 1, #token: 4052, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:48:22,924 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:23,757 - sglang - INFO - [2025-07-20 17:48:23 TP0] Decode batch. #running-req: 1, #token: 4092, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:48:23,758 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:24,596 - sglang - INFO - [2025-07-20 17:48:24 TP0] Decode batch. #running-req: 1, #token: 4132, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:48:24,596 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:25,436 - sglang - INFO - [2025-07-20 17:48:25 TP0] Decode batch. #running-req: 1, #token: 4172, token usage: 0.11, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:48:25,436 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:26,277 - sglang - INFO - [2025-07-20 17:48:26 TP0] Decode batch. #running-req: 1, #token: 4212, token usage: 0.11, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:48:26,277 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:26,992 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:26,993 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 331.33 0.00
- finished_output_tokens 79.57 0.00
- sglang_input_tokens 358.78 43.47
- sglang_output_tokens 101.25 50.00
- 2025-07-20 17:48:26,993 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:27,119 - sglang - INFO - [2025-07-20 17:48:27 TP0] Decode batch. #running-req: 1, #token: 4252, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:48:27,120 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:27,964 - sglang - INFO - [2025-07-20 17:48:27 TP0] Decode batch. #running-req: 1, #token: 4292, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:48:27,964 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:28,811 - sglang - INFO - [2025-07-20 17:48:28 TP0] Decode batch. #running-req: 1, #token: 4332, token usage: 0.11, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-07-20 17:48:28,812 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:29,650 - sglang - INFO - [2025-07-20 17:48:29 TP0] Decode batch. #running-req: 1, #token: 4372, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:48:29,650 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:30,485 - sglang - INFO - [2025-07-20 17:48:30 TP0] Decode batch. #running-req: 1, #token: 4412, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:48:30,485 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:31,320 - sglang - INFO - [2025-07-20 17:48:31 TP0] Decode batch. #running-req: 1, #token: 4452, token usage: 0.12, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:48:31,321 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:32,161 - sglang - INFO - [2025-07-20 17:48:32 TP0] Decode batch. #running-req: 1, #token: 4492, token usage: 0.12, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:48:32,162 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:33,001 - sglang - INFO - [2025-07-20 17:48:33 TP0] Decode batch. #running-req: 1, #token: 4532, token usage: 0.12, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-07-20 17:48:33,001 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:33,846 - sglang - INFO - [2025-07-20 17:48:33 TP0] Decode batch. #running-req: 1, #token: 4572, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:48:33,846 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:34,692 - sglang - INFO - [2025-07-20 17:48:34 TP0] Decode batch. #running-req: 1, #token: 4612, token usage: 0.12, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:48:34,693 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:35,541 - sglang - INFO - [2025-07-20 17:48:35 TP0] Decode batch. #running-req: 1, #token: 4652, token usage: 0.12, gen throughput (token/s): 47.11, #queue-req: 0
- 2025-07-20 17:48:35,542 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:36,386 - sglang - INFO - [2025-07-20 17:48:36 TP0] Decode batch. #running-req: 1, #token: 4692, token usage: 0.12, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:48:36,386 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:36,994 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:36,994 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 329.01 0.00
- finished_output_tokens 79.01 0.00
- sglang_input_tokens 356.28 43.47
- sglang_output_tokens 100.54 50.00
- 2025-07-20 17:48:36,995 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:37,223 - sglang - INFO - [2025-07-20 17:48:37 TP0] Decode batch. #running-req: 1, #token: 4732, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
- 2025-07-20 17:48:37,224 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:38,060 - sglang - INFO - [2025-07-20 17:48:38 TP0] Decode batch. #running-req: 1, #token: 4772, token usage: 0.13, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:48:38,060 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:38,904 - sglang - INFO - [2025-07-20 17:48:38 TP0] Decode batch. #running-req: 1, #token: 4812, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:48:38,904 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:39,745 - sglang - INFO - [2025-07-20 17:48:39 TP0] Decode batch. #running-req: 1, #token: 4852, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-07-20 17:48:39,745 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:40,589 - sglang - INFO - [2025-07-20 17:48:40 TP0] Decode batch. #running-req: 1, #token: 4892, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:48:40,589 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:41,435 - sglang - INFO - [2025-07-20 17:48:41 TP0] Decode batch. #running-req: 1, #token: 4932, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-07-20 17:48:41,435 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:42,284 - sglang - INFO - [2025-07-20 17:48:42 TP0] Decode batch. #running-req: 1, #token: 4972, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
- 2025-07-20 17:48:42,284 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:43,133 - sglang - INFO - [2025-07-20 17:48:43 TP0] Decode batch. #running-req: 1, #token: 5012, token usage: 0.13, gen throughput (token/s): 47.13, #queue-req: 0
- 2025-07-20 17:48:43,133 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:43,973 - sglang - INFO - [2025-07-20 17:48:43 TP0] Decode batch. #running-req: 1, #token: 5052, token usage: 0.13, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:48:43,973 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:44,809 - sglang - INFO - [2025-07-20 17:48:44 TP0] Decode batch. #running-req: 1, #token: 5092, token usage: 0.13, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:48:44,809 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:45,653 - sglang - INFO - [2025-07-20 17:48:45 TP0] Decode batch. #running-req: 1, #token: 5132, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:48:45,653 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:46,496 - sglang - INFO - [2025-07-20 17:48:46 TP0] Decode batch. #running-req: 1, #token: 5172, token usage: 0.14, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:48:46,497 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:46,997 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:46,997 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 326.73 0.00
- finished_output_tokens 78.47 0.00
- sglang_input_tokens 353.80 43.47
- sglang_output_tokens 99.84 50.00
- 2025-07-20 17:48:46,997 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:47,339 - sglang - INFO - [2025-07-20 17:48:47 TP0] Decode batch. #running-req: 1, #token: 5212, token usage: 0.14, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:48:47,339 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:48,178 - sglang - INFO - [2025-07-20 17:48:48 TP0] Decode batch. #running-req: 1, #token: 5252, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:48:48,178 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:49,025 - sglang - INFO - [2025-07-20 17:48:49 TP0] Decode batch. #running-req: 1, #token: 5292, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-07-20 17:48:49,025 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:49,875 - sglang - INFO - [2025-07-20 17:48:49 TP0] Decode batch. #running-req: 1, #token: 5332, token usage: 0.14, gen throughput (token/s): 47.04, #queue-req: 0
- 2025-07-20 17:48:49,876 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:50,719 - sglang - INFO - [2025-07-20 17:48:50 TP0] Decode batch. #running-req: 1, #token: 5372, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:48:50,719 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:51,558 - sglang - INFO - [2025-07-20 17:48:51 TP0] Decode batch. #running-req: 1, #token: 5412, token usage: 0.14, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-07-20 17:48:51,558 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:52,403 - sglang - INFO - [2025-07-20 17:48:52 TP0] Decode batch. #running-req: 1, #token: 5452, token usage: 0.14, gen throughput (token/s): 47.34, #queue-req: 0
- 2025-07-20 17:48:52,403 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:53,250 - sglang - INFO - [2025-07-20 17:48:53 TP0] Decode batch. #running-req: 1, #token: 5492, token usage: 0.14, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-07-20 17:48:53,250 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:54,093 - sglang - INFO - [2025-07-20 17:48:54 TP0] Decode batch. #running-req: 1, #token: 5532, token usage: 0.15, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:48:54,093 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:54,933 - sglang - INFO - [2025-07-20 17:48:54 TP0] Decode batch. #running-req: 1, #token: 5572, token usage: 0.15, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:48:54,933 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:55,682 - __main__ - WARNING - JSON decode error on attempt 6 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:48:55,868 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
- 2025-07-20 17:48:56,071 - sglang - INFO - [2025-07-20 17:48:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-07-20 17:48:56,071 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-07-20 17:48:56,956 - sglang - INFO - [2025-07-20 17:48:56 TP0] Decode batch. #running-req: 1, #token: 2613, token usage: 0.07, gen throughput (token/s): 19.77, #queue-req: 0
- 2025-07-20 17:48:56,956 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:56,999 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:48:56,999 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 324.48 0.00
- finished_output_tokens 77.93 0.00
- sglang_input_tokens 353.16 43.47
- sglang_output_tokens 101.22 50.00
- 2025-07-20 17:48:57,000 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:48:57,788 - sglang - INFO - [2025-07-20 17:48:57 TP0] Decode batch. #running-req: 1, #token: 2653, token usage: 0.07, gen throughput (token/s): 48.06, #queue-req: 0
- 2025-07-20 17:48:57,789 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:58,621 - sglang - INFO - [2025-07-20 17:48:58 TP0] Decode batch. #running-req: 1, #token: 2693, token usage: 0.07, gen throughput (token/s): 48.03, #queue-req: 0
- 2025-07-20 17:48:58,621 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:48:59,459 - sglang - INFO - [2025-07-20 17:48:59 TP0] Decode batch. #running-req: 1, #token: 2733, token usage: 0.07, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-07-20 17:48:59,459 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:00,293 - sglang - INFO - [2025-07-20 17:49:00 TP0] Decode batch. #running-req: 1, #token: 2773, token usage: 0.07, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:49:00,293 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:01,122 - sglang - INFO - [2025-07-20 17:49:01 TP0] Decode batch. #running-req: 1, #token: 2813, token usage: 0.07, gen throughput (token/s): 48.23, #queue-req: 0
- 2025-07-20 17:49:01,122 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:01,952 - sglang - INFO - [2025-07-20 17:49:01 TP0] Decode batch. #running-req: 1, #token: 2853, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
- 2025-07-20 17:49:01,952 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:02,782 - sglang - INFO - [2025-07-20 17:49:02 TP0] Decode batch. #running-req: 1, #token: 2893, token usage: 0.08, gen throughput (token/s): 48.18, #queue-req: 0
- 2025-07-20 17:49:02,783 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:03,619 - sglang - INFO - [2025-07-20 17:49:03 TP0] Decode batch. #running-req: 1, #token: 2933, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:49:03,619 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:04,453 - sglang - INFO - [2025-07-20 17:49:04 TP0] Decode batch. #running-req: 1, #token: 2973, token usage: 0.08, gen throughput (token/s): 47.94, #queue-req: 0
- 2025-07-20 17:49:04,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:05,285 - sglang - INFO - [2025-07-20 17:49:05 TP0] Decode batch. #running-req: 1, #token: 3013, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
- 2025-07-20 17:49:05,285 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:06,121 - sglang - INFO - [2025-07-20 17:49:06 TP0] Decode batch. #running-req: 1, #token: 3053, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
- 2025-07-20 17:49:06,122 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:06,957 - sglang - INFO - [2025-07-20 17:49:06 TP0] Decode batch. #running-req: 1, #token: 3093, token usage: 0.08, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:49:06,958 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:07,000 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:07,001 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 322.25 0.00
- finished_output_tokens 77.39 0.00
- sglang_input_tokens 350.74 43.47
- sglang_output_tokens 100.53 50.00
- 2025-07-20 17:49:07,001 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:07,789 - sglang - INFO - [2025-07-20 17:49:07 TP0] Decode batch. #running-req: 1, #token: 3133, token usage: 0.08, gen throughput (token/s): 48.11, #queue-req: 0
- 2025-07-20 17:49:07,789 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:08,621 - sglang - INFO - [2025-07-20 17:49:08 TP0] Decode batch. #running-req: 1, #token: 3173, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
- 2025-07-20 17:49:08,621 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:09,457 - sglang - INFO - [2025-07-20 17:49:09 TP0] Decode batch. #running-req: 1, #token: 3213, token usage: 0.08, gen throughput (token/s): 47.85, #queue-req: 0
- 2025-07-20 17:49:09,457 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:10,298 - sglang - INFO - [2025-07-20 17:49:10 TP0] Decode batch. #running-req: 1, #token: 3253, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-07-20 17:49:10,299 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:11,142 - sglang - INFO - [2025-07-20 17:49:11 TP0] Decode batch. #running-req: 1, #token: 3293, token usage: 0.09, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:49:11,142 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:11,975 - sglang - INFO - [2025-07-20 17:49:11 TP0] Decode batch. #running-req: 1, #token: 3333, token usage: 0.09, gen throughput (token/s): 48.05, #queue-req: 0
- 2025-07-20 17:49:11,975 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:12,809 - sglang - INFO - [2025-07-20 17:49:12 TP0] Decode batch. #running-req: 1, #token: 3373, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:49:12,809 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:13,648 - sglang - INFO - [2025-07-20 17:49:13 TP0] Decode batch. #running-req: 1, #token: 3413, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:49:13,648 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:14,488 - sglang - INFO - [2025-07-20 17:49:14 TP0] Decode batch. #running-req: 1, #token: 3453, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
- 2025-07-20 17:49:14,489 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:15,332 - sglang - INFO - [2025-07-20 17:49:15 TP0] Decode batch. #running-req: 1, #token: 3493, token usage: 0.09, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-07-20 17:49:15,333 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:16,173 - sglang - INFO - [2025-07-20 17:49:16 TP0] Decode batch. #running-req: 1, #token: 3533, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-07-20 17:49:16,173 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:17,003 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:17,004 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 320.06 0.00
- finished_output_tokens 76.87 0.00
- sglang_input_tokens 348.36 43.47
- sglang_output_tokens 99.85 50.00
- 2025-07-20 17:49:17,004 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:17,016 - sglang - INFO - [2025-07-20 17:49:17 TP0] Decode batch. #running-req: 1, #token: 3573, token usage: 0.09, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:49:17,016 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:17,860 - sglang - INFO - [2025-07-20 17:49:17 TP0] Decode batch. #running-req: 1, #token: 3613, token usage: 0.10, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:49:17,860 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:18,694 - sglang - INFO - [2025-07-20 17:49:18 TP0] Decode batch. #running-req: 1, #token: 3653, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
- 2025-07-20 17:49:18,694 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:19,530 - sglang - INFO - [2025-07-20 17:49:19 TP0] Decode batch. #running-req: 1, #token: 3693, token usage: 0.10, gen throughput (token/s): 47.81, #queue-req: 0
- 2025-07-20 17:49:19,530 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:20,371 - sglang - INFO - [2025-07-20 17:49:20 TP0] Decode batch. #running-req: 1, #token: 3733, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:49:20,371 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:21,207 - sglang - INFO - [2025-07-20 17:49:21 TP0] Decode batch. #running-req: 1, #token: 3773, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
- 2025-07-20 17:49:21,207 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:22,046 - sglang - INFO - [2025-07-20 17:49:22 TP0] Decode batch. #running-req: 1, #token: 3813, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-07-20 17:49:22,046 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:22,886 - sglang - INFO - [2025-07-20 17:49:22 TP0] Decode batch. #running-req: 1, #token: 3853, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:49:22,886 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:23,730 - sglang - INFO - [2025-07-20 17:49:23 TP0] Decode batch. #running-req: 1, #token: 3893, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:49:23,730 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:24,576 - sglang - INFO - [2025-07-20 17:49:24 TP0] Decode batch. #running-req: 1, #token: 3933, token usage: 0.10, gen throughput (token/s): 47.32, #queue-req: 0
- 2025-07-20 17:49:24,576 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:25,410 - sglang - INFO - [2025-07-20 17:49:25 TP0] Decode batch. #running-req: 1, #token: 3973, token usage: 0.10, gen throughput (token/s): 47.95, #queue-req: 0
- 2025-07-20 17:49:25,410 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:26,244 - sglang - INFO - [2025-07-20 17:49:26 TP0] Decode batch. #running-req: 1, #token: 4013, token usage: 0.11, gen throughput (token/s): 47.97, #queue-req: 0
- 2025-07-20 17:49:26,244 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:27,005 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:27,006 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 317.90 0.00
- finished_output_tokens 76.35 0.00
- sglang_input_tokens 346.00 43.47
- sglang_output_tokens 99.17 50.00
- 2025-07-20 17:49:27,006 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:27,085 - sglang - INFO - [2025-07-20 17:49:27 TP0] Decode batch. #running-req: 1, #token: 4053, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:49:27,085 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:27,926 - sglang - INFO - [2025-07-20 17:49:27 TP0] Decode batch. #running-req: 1, #token: 4093, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:49:27,926 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:28,769 - sglang - INFO - [2025-07-20 17:49:28 TP0] Decode batch. #running-req: 1, #token: 4133, token usage: 0.11, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-07-20 17:49:28,769 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:29,611 - sglang - INFO - [2025-07-20 17:49:29 TP0] Decode batch. #running-req: 1, #token: 4173, token usage: 0.11, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-07-20 17:49:29,611 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:30,456 - sglang - INFO - [2025-07-20 17:49:30 TP0] Decode batch. #running-req: 1, #token: 4213, token usage: 0.11, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-07-20 17:49:30,457 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:31,305 - sglang - INFO - [2025-07-20 17:49:31 TP0] Decode batch. #running-req: 1, #token: 4253, token usage: 0.11, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-07-20 17:49:31,305 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:32,143 - sglang - INFO - [2025-07-20 17:49:32 TP0] Decode batch. #running-req: 1, #token: 4293, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-07-20 17:49:32,143 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:32,979 - sglang - INFO - [2025-07-20 17:49:32 TP0] Decode batch. #running-req: 1, #token: 4333, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
- 2025-07-20 17:49:32,979 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:33,821 - sglang - INFO - [2025-07-20 17:49:33 TP0] Decode batch. #running-req: 1, #token: 4373, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:49:33,821 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:34,665 - sglang - INFO - [2025-07-20 17:49:34 TP0] Decode batch. #running-req: 1, #token: 4413, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:49:34,665 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:35,510 - sglang - INFO - [2025-07-20 17:49:35 TP0] Decode batch. #running-req: 1, #token: 4453, token usage: 0.12, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-07-20 17:49:35,510 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:36,353 - sglang - INFO - [2025-07-20 17:49:36 TP0] Decode batch. #running-req: 1, #token: 4493, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:49:36,353 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:37,007 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:37,007 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 315.77 0.00
- finished_output_tokens 75.83 0.00
- sglang_input_tokens 343.68 43.47
- sglang_output_tokens 98.51 50.00
- 2025-07-20 17:49:37,007 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:37,200 - sglang - INFO - [2025-07-20 17:49:37 TP0] Decode batch. #running-req: 1, #token: 4533, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-07-20 17:49:37,201 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:38,048 - sglang - INFO - [2025-07-20 17:49:38 TP0] Decode batch. #running-req: 1, #token: 4573, token usage: 0.12, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-07-20 17:49:38,049 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:38,891 - sglang - INFO - [2025-07-20 17:49:38 TP0] Decode batch. #running-req: 1, #token: 4613, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-07-20 17:49:38,891 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:39,730 - sglang - INFO - [2025-07-20 17:49:39 TP0] Decode batch. #running-req: 1, #token: 4653, token usage: 0.12, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:49:39,730 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:40,574 - sglang - INFO - [2025-07-20 17:49:40 TP0] Decode batch. #running-req: 1, #token: 4693, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:49:40,574 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:41,418 - sglang - INFO - [2025-07-20 17:49:41 TP0] Decode batch. #running-req: 1, #token: 4733, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-07-20 17:49:41,418 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:42,261 - sglang - INFO - [2025-07-20 17:49:42 TP0] Decode batch. #running-req: 1, #token: 4773, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:49:42,262 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:43,105 - sglang - INFO - [2025-07-20 17:49:43 TP0] Decode batch. #running-req: 1, #token: 4813, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-07-20 17:49:43,105 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:43,948 - sglang - INFO - [2025-07-20 17:49:43 TP0] Decode batch. #running-req: 1, #token: 4853, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-07-20 17:49:43,948 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:44,797 - sglang - INFO - [2025-07-20 17:49:44 TP0] Decode batch. #running-req: 1, #token: 4893, token usage: 0.13, gen throughput (token/s): 47.14, #queue-req: 0
- 2025-07-20 17:49:44,797 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:45,637 - sglang - INFO - [2025-07-20 17:49:45 TP0] Decode batch. #running-req: 1, #token: 4933, token usage: 0.13, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-07-20 17:49:45,637 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:46,475 - sglang - INFO - [2025-07-20 17:49:46 TP0] Decode batch. #running-req: 1, #token: 4973, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
- 2025-07-20 17:49:46,475 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:47,008 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:47,008 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 313.66 0.00
- finished_output_tokens 75.33 0.00
- sglang_input_tokens 341.39 43.47
- sglang_output_tokens 97.85 50.00
- 2025-07-20 17:49:47,009 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:47,317 - sglang - INFO - [2025-07-20 17:49:47 TP0] Decode batch. #running-req: 1, #token: 5013, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-07-20 17:49:47,318 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:48,161 - sglang - INFO - [2025-07-20 17:49:48 TP0] Decode batch. #running-req: 1, #token: 5053, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-07-20 17:49:48,162 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:49,005 - sglang - INFO - [2025-07-20 17:49:49 TP0] Decode batch. #running-req: 1, #token: 5093, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-07-20 17:49:49,005 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:49,844 - sglang - INFO - [2025-07-20 17:49:49 TP0] Decode batch. #running-req: 1, #token: 5133, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-07-20 17:49:49,844 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:50,686 - sglang - INFO - [2025-07-20 17:49:50 TP0] Decode batch. #running-req: 1, #token: 5173, token usage: 0.14, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-07-20 17:49:50,686 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:51,535 - sglang - INFO - [2025-07-20 17:49:51 TP0] Decode batch. #running-req: 1, #token: 5213, token usage: 0.14, gen throughput (token/s): 47.14, #queue-req: 0
- 2025-07-20 17:49:51,535 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:52,379 - sglang - INFO - [2025-07-20 17:49:52 TP0] Decode batch. #running-req: 1, #token: 5253, token usage: 0.14, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-07-20 17:49:52,379 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:53,220 - sglang - INFO - [2025-07-20 17:49:53 TP0] Decode batch. #running-req: 1, #token: 5293, token usage: 0.14, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-07-20 17:49:53,220 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:54,063 - sglang - INFO - [2025-07-20 17:49:54 TP0] Decode batch. #running-req: 1, #token: 5333, token usage: 0.14, gen throughput (token/s): 47.41, #queue-req: 0
- 2025-07-20 17:49:54,064 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:54,911 - sglang - INFO - [2025-07-20 17:49:54 TP0] Decode batch. #running-req: 1, #token: 5373, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-07-20 17:49:54,911 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:55,752 - sglang - INFO - [2025-07-20 17:49:55 TP0] Decode batch. #running-req: 1, #token: 5413, token usage: 0.14, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-07-20 17:49:55,752 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:56,592 - sglang - INFO - [2025-07-20 17:49:56 TP0] Decode batch. #running-req: 1, #token: 5453, token usage: 0.14, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-07-20 17:49:56,592 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:57,010 - __main__ - INFO - Queue remaining: 0
- 2025-07-20 17:49:57,010 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- finished_input_tokens 311.59 0.00
- finished_output_tokens 74.83 0.00
- sglang_input_tokens 339.13 43.47
- sglang_output_tokens 97.20 50.00
- 2025-07-20 17:49:57,010 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 9
- 2025-07-20 17:49:57,432 - sglang - INFO - [2025-07-20 17:49:57 TP0] Decode batch. #running-req: 1, #token: 5493, token usage: 0.14, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-07-20 17:49:57,432 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:58,278 - sglang - INFO - [2025-07-20 17:49:58 TP0] Decode batch. #running-req: 1, #token: 5533, token usage: 0.15, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-07-20 17:49:58,278 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:59,121 - sglang - INFO - [2025-07-20 17:49:59 TP0] Decode batch. #running-req: 1, #token: 5573, token usage: 0.15, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-07-20 17:49:59,122 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-07-20 17:49:59,844 - __main__ - WARNING - JSON decode error on attempt 7 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
- 2025-07-20 17:49:59,844 - __main__ - ERROR - Failed to process test_pdf/1144520000702630XG344010604301601.pdf-5 after 8 attempts.
- 2025-07-20 17:49:59,857 - __main__ - ERROR - Document test_pdf/1144520000702630XG344010604301601.pdf has 1 fallback pages out of 9 exceeding max_page_error_rate of 0.004, discarding document.
- 2025-07-20 17:49:59,858 - __main__ - INFO - Finished TaskGroup for worker on 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
- 2025-07-20 17:49:59,858 - __main__ - INFO - Got 0 docs for 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
- 2025-07-20 17:49:59,859 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-07-20 17:49:59,860 - __main__ - INFO - Work done
- 2025-07-20 17:49:59,860 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:25:02,460 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:25:02,460 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
- 2025-08-24 23:25:02,461 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:25:02,465 - __main__ - INFO - Calculated items_per_group: 2 based on average pages per PDF: 5.00
- 2025-08-24 23:25:02,621 - __main__ - INFO - Starting pipeline with PID 476723
- 2025-08-24 23:25:02,621 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:25:02,710 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:25:03,741 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:25:04,787 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:25:05,852 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:25:06,918 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:25:07,984 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:25:09,043 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:25:10,087 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:25:11,136 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:25:11,702 - sglang - INFO - [2025-08-24 23:25:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=704242960, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:25:11,702 - __main__ - INFO - [2025-08-24 23:25:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=704242960, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:25:12,199 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:25:12,632 - sglang - INFO - [2025-08-24 23:25:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:25:12,633 - __main__ - INFO - [2025-08-24 23:25:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:25:13,252 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:25:14,323 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:25:15,395 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:25:16,466 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:25:17,528 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:25:18,595 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:25:18,874 - sglang - INFO - [2025-08-24 23:25:18 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:25:18,874 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:25:18,876 - sglang - INFO - [2025-08-24 23:25:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:25:18,876 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:25:18,876 - sglang - INFO - [2025-08-24 23:25:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:25:18,877 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:25:18,877 - sglang - INFO - [2025-08-24 23:25:18 TP0] Init torch distributed begin.
- 2025-08-24 23:25:18,877 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Init torch distributed begin.
- 2025-08-24 23:25:19,675 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:25:20,742 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:25:21,796 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:25:22,863 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:25:23,928 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:25:24,213 - sglang - INFO - [2025-08-24 23:25:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:25:24,213 - __main__ - INFO - [2025-08-24 23:25:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:25:24,892 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:25:24,892 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:25:25,007 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-24 23:25:25,705 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.23it/s]
- 2025-08-24 23:25:25,705 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.23it/s]
- 2025-08-24 23:25:26,086 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-24 23:25:26,663 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.11it/s]
- 2025-08-24 23:25:26,663 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.11it/s]
- 2025-08-24 23:25:27,166 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-24 23:25:27,593 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.10it/s]
- 2025-08-24 23:25:27,593 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.10it/s]
- 2025-08-24 23:25:28,036 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.37it/s]
- 2025-08-24 23:25:28,036 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.37it/s]
- 2025-08-24 23:25:28,036 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-08-24 23:25:28,036 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
- 2025-08-24 23:25:28,037 - sglang - INFO -
- 2025-08-24 23:25:28,037 - __main__ - INFO -
- 2025-08-24 23:25:28,100 - sglang - INFO - [2025-08-24 23:25:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:25:28,100 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:25:28,105 - sglang - INFO - [2025-08-24 23:25:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:25:28,105 - __main__ - INFO - [2025-08-24 23:25:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:25:28,106 - sglang - INFO - [2025-08-24 23:25:28 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:25:28,106 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:25:28,247 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-24 23:25:28,284 - sglang - INFO - [2025-08-24 23:25:28 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:25:28,284 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:25:29,330 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-08-24 23:25:30,156 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.07s/it]
50%|█████ | 2/4 [00:01<00:01, 1.68it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.66it/s]
100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
- 2025-08-24 23:25:30,156 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.07s/it]
50%|█████ | 2/4 [00:01<00:01, 1.68it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.66it/s]
100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
- 2025-08-24 23:25:30,156 - sglang - INFO - [2025-08-24 23:25:30 TP0] Capture cuda graph end. Time elapsed: 1.87 s
- 2025-08-24 23:25:30,156 - __main__ - INFO - [2025-08-24 23:25:30 TP0] Capture cuda graph end. Time elapsed: 1.87 s
- 2025-08-24 23:25:30,416 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
- 2025-08-24 23:25:30,848 - sglang - INFO - [2025-08-24 23:25:30 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:25:30,849 - __main__ - INFO - [2025-08-24 23:25:30 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:25:31,509 - __main__ - INFO - sglang server is ready.
- 2025-08-24 23:25:31,509 - __main__ - INFO - Queue remaining: 1
- 2025-08-24 23:25:31,509 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:25:31,509 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-24 23:25:31,510 - __main__ - INFO - Worker 0 processing work item c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
- 2025-08-24 23:25:31,510 - __main__ - INFO - Created all tasks for c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
- 2025-08-24 23:25:31,516 - __main__ - INFO - Got 5 pages to do for ./workspace/delivery.pdf in worker 0
- 2025-08-24 23:25:31,925 - sglang - INFO - [2025-08-24 23:25:31 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:25:31,926 - __main__ - INFO - [2025-08-24 23:25:31 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:25:31,926 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:25:33,074 - sglang - INFO - [2025-08-24 23:25:33] The server is fired up and ready to roll!
- 2025-08-24 23:25:33,075 - __main__ - INFO - [2025-08-24 23:25:33] The server is fired up and ready to roll!
- 2025-08-24 23:25:37,800 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-1
- 2025-08-24 23:25:37,839 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-2
- 2025-08-24 23:25:37,875 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-3
- 2025-08-24 23:25:37,886 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-4
- 2025-08-24 23:25:37,907 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-5
- 2025-08-24 23:25:41,532 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:25:41,533 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:25:41,533 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-08-24 23:25:51,534 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:25:51,534 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:25:51,535 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-08-24 23:25:54,865 - sglang - INFO - [2025-08-24 23:25:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2017, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:25:54,866 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:25:55,873 - sglang - INFO - [2025-08-24 23:25:55 TP0] Prefill batch. #new-seq: 4, #new-token: 8308, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
- 2025-08-24 23:25:55,874 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-24 23:25:59,272 - sglang - INFO - [2025-08-24 23:25:59 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.05, #queue-req: 0
- 2025-08-24 23:25:59,272 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:00,139 - sglang - INFO - [2025-08-24 23:26:00 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 230.55, #queue-req: 0
- 2025-08-24 23:26:00,140 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:01,008 - sglang - INFO - [2025-08-24 23:26:01 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 230.35, #queue-req: 0
- 2025-08-24 23:26:01,008 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:01,536 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:26:01,536 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:26:01,537 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 5
- 2025-08-24 23:26:01,875 - sglang - INFO - [2025-08-24 23:26:01 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 230.48, #queue-req: 0
- 2025-08-24 23:26:01,876 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:02,745 - sglang - INFO - [2025-08-24 23:26:02 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 229.99, #queue-req: 0
- 2025-08-24 23:26:02,745 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:03,617 - sglang - INFO - [2025-08-24 23:26:03 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 229.50, #queue-req: 0
- 2025-08-24 23:26:03,617 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:03,857 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-08-24 23:26:04,491 - sglang - INFO - [2025-08-24 23:26:04 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 228.69, #queue-req: 0
- 2025-08-24 23:26:04,491 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:05,366 - sglang - INFO - [2025-08-24 23:26:05 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 228.53, #queue-req: 0
- 2025-08-24 23:26:05,367 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-24 23:26:06,239 - sglang - INFO - [2025-08-24 23:26:06 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 213.17, #queue-req: 0
- 2025-08-24 23:26:06,239 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-24 23:26:07,093 - sglang - INFO - [2025-08-24 23:26:07 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 140.51, #queue-req: 0
- 2025-08-24 23:26:07,093 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-24 23:26:07,935 - sglang - INFO - [2025-08-24 23:26:07 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 73.62, #queue-req: 0
- 2025-08-24 23:26:07,935 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-24 23:26:08,770 - sglang - INFO - [2025-08-24 23:26:08 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 47.92, #queue-req: 0
- 2025-08-24 23:26:08,770 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-24 23:26:09,175 - __main__ - INFO - Finished TaskGroup for worker on c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
- 2025-08-24 23:26:09,175 - __main__ - INFO - Got 1 docs for c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
- 2025-08-24 23:26:09,176 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-24 23:26:09,177 - __main__ - INFO - Work done
- 2025-08-24 23:26:09,177 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:39:48,280 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:39:48,280 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
- 2025-08-24 23:39:48,280 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:39:48,284 - __main__ - INFO - Calculated items_per_group: 2 based on average pages per PDF: 5.00
- 2025-08-24 23:39:48,490 - __main__ - INFO - Starting pipeline with PID 478445
- 2025-08-24 23:39:48,490 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:39:48,491 - __main__ - INFO - No work to do, exiting
- 2025-08-24 23:44:29,672 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:44:29,672 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
- 2025-08-24 23:44:29,672 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:44:29,674 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:44:29,847 - __main__ - INFO - Starting pipeline with PID 478740
- 2025-08-24 23:44:29,848 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:44:29,849 - __main__ - INFO - No work to do, exiting
- 2025-08-24 23:45:28,141 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:45:28,142 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
- 2025-08-24 23:45:28,142 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:45:28,144 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:45:28,267 - __main__ - INFO - Starting pipeline with PID 478956
- 2025-08-24 23:45:28,267 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:45:28,343 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:45:29,372 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:45:30,404 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:45:31,455 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:45:32,522 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:45:33,591 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:45:34,642 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:45:34,880 - sglang - INFO - [2025-08-24 23:45:34] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=725225729, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:45:34,881 - __main__ - INFO - [2025-08-24 23:45:34] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=725225729, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:45:35,694 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:45:35,839 - sglang - INFO - [2025-08-24 23:45:35] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:45:35,839 - __main__ - INFO - [2025-08-24 23:45:35] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:45:36,745 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:45:37,815 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:45:38,888 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:45:39,962 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:45:41,012 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:45:41,949 - sglang - INFO - [2025-08-24 23:45:41 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:45:41,949 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:45:41,951 - sglang - INFO - [2025-08-24 23:45:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:45:41,951 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:45:41,952 - sglang - INFO - [2025-08-24 23:45:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:45:41,952 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:45:41,952 - sglang - INFO - [2025-08-24 23:45:41 TP0] Init torch distributed begin.
- 2025-08-24 23:45:41,952 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Init torch distributed begin.
- 2025-08-24 23:45:42,096 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:45:43,131 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:45:44,196 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:45:45,274 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:45:46,343 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:45:47,280 - sglang - INFO - [2025-08-24 23:45:47 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:45:47,281 - __main__ - INFO - [2025-08-24 23:45:47 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:45:47,386 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:45:47,790 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:45:47,790 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:45:48,442 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:45:48,788 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.00it/s]
- 2025-08-24 23:45:48,788 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.00it/s]
- 2025-08-24 23:45:49,522 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:45:49,887 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.06s/it]
- 2025-08-24 23:45:49,887 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.06s/it]
- 2025-08-24 23:45:50,601 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-24 23:45:50,977 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.07s/it]
- 2025-08-24 23:45:50,977 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.07s/it]
- 2025-08-24 23:45:51,448 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-08-24 23:45:51,448 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
- 2025-08-24 23:45:51,448 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
- 2025-08-24 23:45:51,448 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
- 2025-08-24 23:45:51,448 - sglang - INFO -
- 2025-08-24 23:45:51,448 - __main__ - INFO -
- 2025-08-24 23:45:51,511 - sglang - INFO - [2025-08-24 23:45:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:45:51,511 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:45:51,517 - sglang - INFO - [2025-08-24 23:45:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:45:51,517 - __main__ - INFO - [2025-08-24 23:45:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:45:51,517 - sglang - INFO - [2025-08-24 23:45:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:45:51,518 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:45:51,663 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-24 23:45:51,666 - sglang - INFO - [2025-08-24 23:45:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:45:51,666 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:45:52,737 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-24 23:45:53,508 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.01it/s]
50%|█████ | 2/4 [00:01<00:01, 1.74it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.27it/s]
100%|██████████| 4/4 [00:01<00:00, 2.64it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:45:53,509 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:00<00:02, 1.01it/s]
50%|█████ | 2/4 [00:01<00:01, 1.74it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.27it/s]
100%|██████████| 4/4 [00:01<00:00, 2.64it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:45:53,509 - sglang - INFO - [2025-08-24 23:45:53 TP0] Capture cuda graph end. Time elapsed: 1.84 s
- 2025-08-24 23:45:53,509 - __main__ - INFO - [2025-08-24 23:45:53 TP0] Capture cuda graph end. Time elapsed: 1.84 s
- 2025-08-24 23:45:53,815 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-24 23:45:54,222 - sglang - INFO - [2025-08-24 23:45:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:45:54,223 - __main__ - INFO - [2025-08-24 23:45:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:45:54,914 - __main__ - INFO - sglang server is ready.
- 2025-08-24 23:45:54,914 - __main__ - INFO - Queue remaining: 1
- 2025-08-24 23:45:54,914 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:45:54,914 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-24 23:45:54,915 - __main__ - INFO - Worker 0 processing work item 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:45:54,915 - __main__ - INFO - Created all tasks for 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:45:54,916 - __main__ - INFO - Got 1 pages to do for ./workspace/ambiguous.pdf in worker 0
- 2025-08-24 23:45:55,309 - sglang - INFO - [2025-08-24 23:45:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:45:55,309 - __main__ - INFO - [2025-08-24 23:45:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:45:55,310 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:45:55,965 - sglang - INFO - [2025-08-24 23:45:55] The server is fired up and ready to roll!
- 2025-08-24 23:45:55,965 - __main__ - INFO - [2025-08-24 23:45:55] The server is fired up and ready to roll!
- 2025-08-24 23:46:01,367 - __main__ - INFO - Built page query for ./workspace/ambiguous.pdf-1
- 2025-08-24 23:46:04,916 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:46:04,916 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:46:04,916 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:46:14,932 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:46:14,933 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:46:14,933 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:46:18,474 - sglang - INFO - [2025-08-24 23:46:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:46:18,474 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:46:19,964 - sglang - INFO - [2025-08-24 23:46:19 TP0] Decode batch. #running-req: 1, #token: 1189, token usage: 0.03, gen throughput (token/s): 1.55, #queue-req: 0
- 2025-08-24 23:46:19,964 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-24 23:46:20,322 - __main__ - INFO - Finished TaskGroup for worker on 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:46:20,323 - __main__ - INFO - Got 1 docs for 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:46:20,324 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-24 23:46:20,324 - __main__ - INFO - Work done
- 2025-08-24 23:46:20,325 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:47:59,474 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:47:59,475 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
- 2025-08-24 23:47:59,475 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:47:59,476 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:47:59,645 - __main__ - INFO - Starting pipeline with PID 480164
- 2025-08-24 23:47:59,645 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:47:59,646 - __main__ - INFO - No work to do, exiting
- 2025-08-24 23:53:01,828 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:53:01,829 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
- 2025-08-24 23:53:01,829 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:53:01,830 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:53:02,019 - __main__ - INFO - Starting pipeline with PID 480504
- 2025-08-24 23:53:02,019 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:53:02,114 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:53:03,152 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:53:04,196 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:53:05,257 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:53:06,322 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:53:07,392 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:53:08,457 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:53:09,199 - sglang - INFO - [2025-08-24 23:53:09] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901603346, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:53:09,199 - __main__ - INFO - [2025-08-24 23:53:09] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901603346, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:53:09,537 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:53:10,229 - sglang - INFO - [2025-08-24 23:53:10] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:53:10,229 - __main__ - INFO - [2025-08-24 23:53:10] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:53:10,573 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:53:11,635 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:53:12,701 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:53:13,772 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:53:14,843 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:53:15,913 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Init torch distributed begin.
- 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Init torch distributed begin.
- 2025-08-24 23:53:16,983 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:53:18,059 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:53:19,130 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:53:20,202 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:53:21,262 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:53:22,249 - sglang - INFO - [2025-08-24 23:53:22 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:53:22,249 - __main__ - INFO - [2025-08-24 23:53:22 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:53:22,337 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:53:22,777 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:53:22,777 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:53:23,418 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:53:23,722 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.06it/s]
- 2025-08-24 23:53:23,722 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.06it/s]
- 2025-08-24 23:53:24,492 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-24 23:53:24,711 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.03it/s]
- 2025-08-24 23:53:24,711 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.03it/s]
- 2025-08-24 23:53:25,578 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-24 23:53:25,679 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.03it/s]
- 2025-08-24 23:53:25,679 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.03it/s]
- 2025-08-24 23:53:26,073 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.35it/s]
- 2025-08-24 23:53:26,073 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.35it/s]
- 2025-08-24 23:53:26,073 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-08-24 23:53:26,073 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
- 2025-08-24 23:53:26,074 - sglang - INFO -
- 2025-08-24 23:53:26,074 - __main__ - INFO -
- 2025-08-24 23:53:26,119 - sglang - INFO - [2025-08-24 23:53:26 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:53:26,119 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:53:26,125 - sglang - INFO - [2025-08-24 23:53:26 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:53:26,125 - __main__ - INFO - [2025-08-24 23:53:26 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:53:26,125 - sglang - INFO - [2025-08-24 23:53:26 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:53:26,125 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:53:26,277 - sglang - INFO - [2025-08-24 23:53:26 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:53:26,278 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:53:26,662 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-24 23:53:27,742 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-24 23:53:28,128 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.00s/it]
50%|█████ | 2/4 [00:01<00:01, 1.74it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s]
100%|██████████| 4/4 [00:01<00:00, 2.63it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:53:28,128 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.00s/it]
50%|█████ | 2/4 [00:01<00:01, 1.74it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s]
100%|██████████| 4/4 [00:01<00:00, 2.63it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:53:28,128 - sglang - INFO - [2025-08-24 23:53:28 TP0] Capture cuda graph end. Time elapsed: 1.85 s
- 2025-08-24 23:53:28,128 - __main__ - INFO - [2025-08-24 23:53:28 TP0] Capture cuda graph end. Time elapsed: 1.85 s
- 2025-08-24 23:53:28,822 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-08-24 23:53:28,850 - sglang - INFO - [2025-08-24 23:53:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:53:28,850 - __main__ - INFO - [2025-08-24 23:53:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:53:29,924 - __main__ - INFO - sglang server is ready.
- 2025-08-24 23:53:29,925 - __main__ - INFO - Queue remaining: 1
- 2025-08-24 23:53:29,925 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:53:29,925 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-24 23:53:29,925 - __main__ - INFO - Worker 0 processing work item 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:53:29,925 - __main__ - INFO - Created all tasks for 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:53:29,927 - __main__ - INFO - Got 1 pages to do for ./workspace/ambiguous.pdf in worker 0
- 2025-08-24 23:53:29,938 - sglang - INFO - [2025-08-24 23:53:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:53:29,939 - __main__ - INFO - [2025-08-24 23:53:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:53:29,939 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:53:30,519 - sglang - INFO - [2025-08-24 23:53:30] The server is fired up and ready to roll!
- 2025-08-24 23:53:30,519 - __main__ - INFO - [2025-08-24 23:53:30] The server is fired up and ready to roll!
- 2025-08-24 23:53:36,342 - __main__ - INFO - Built page query for ./workspace/ambiguous.pdf-1
- 2025-08-24 23:53:39,932 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:53:39,933 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:53:39,933 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:53:49,934 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:53:49,935 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:53:49,935 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:53:53,350 - sglang - INFO - [2025-08-24 23:53:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:53:53,351 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:53:54,801 - sglang - INFO - [2025-08-24 23:53:54 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 1.54, #queue-req: 0
- 2025-08-24 23:53:54,802 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-24 23:53:54,811 - __main__ - INFO - Finished TaskGroup for worker on 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:53:54,811 - __main__ - INFO - Got 1 docs for 0dd0e3c651bff849c06afd78369d2b942edcf042
- 2025-08-24 23:53:54,812 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-24 23:53:54,813 - __main__ - INFO - Work done
- 2025-08-24 23:53:54,813 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:55:05,116 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:55:05,116 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
- 2025-08-24 23:55:05,116 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:55:05,120 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:55:05,301 - __main__ - INFO - Starting pipeline with PID 481640
- 2025-08-24 23:55:05,301 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:55:05,382 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:55:06,416 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:55:07,473 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:55:08,516 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:55:09,556 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:55:10,609 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:55:11,668 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:55:12,409 - sglang - INFO - [2025-08-24 23:55:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=677962409, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:55:12,410 - __main__ - INFO - [2025-08-24 23:55:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=677962409, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:55:12,736 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:55:13,443 - sglang - INFO - [2025-08-24 23:55:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:55:13,443 - __main__ - INFO - [2025-08-24 23:55:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:55:13,816 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:55:14,848 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:55:15,910 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:55:16,976 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:55:18,042 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:55:19,107 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:55:19,664 - sglang - INFO - [2025-08-24 23:55:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:55:19,664 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Init torch distributed begin.
- 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Init torch distributed begin.
- 2025-08-24 23:55:20,191 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:55:21,264 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:55:22,326 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:55:23,394 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:55:24,439 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:55:24,996 - sglang - INFO - [2025-08-24 23:55:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:55:24,996 - __main__ - INFO - [2025-08-24 23:55:24 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:55:25,484 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:55:25,535 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:55:25,535 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:55:26,374 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
- 2025-08-24 23:55:26,374 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
- 2025-08-24 23:55:26,530 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:55:27,269 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
- 2025-08-24 23:55:27,269 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
- 2025-08-24 23:55:27,577 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-24 23:55:28,240 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
- 2025-08-24 23:55:28,240 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
- 2025-08-24 23:55:28,622 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-24 23:55:28,666 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.38it/s]
- 2025-08-24 23:55:28,666 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.38it/s]
- 2025-08-24 23:55:28,666 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
- 2025-08-24 23:55:28,666 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
- 2025-08-24 23:55:28,666 - sglang - INFO -
- 2025-08-24 23:55:28,666 - __main__ - INFO -
- 2025-08-24 23:55:28,721 - sglang - INFO - [2025-08-24 23:55:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:55:28,722 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:55:28,730 - sglang - INFO - [2025-08-24 23:55:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:55:28,730 - __main__ - INFO - [2025-08-24 23:55:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:55:28,730 - sglang - INFO - [2025-08-24 23:55:28 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:55:28,730 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:55:28,916 - sglang - INFO - [2025-08-24 23:55:28 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:55:28,916 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:55:29,667 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-24 23:55:30,715 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-24 23:55:30,964 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.19s/it]
50%|█████ | 2/4 [00:01<00:01, 1.50it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.04it/s]
100%|██████████| 4/4 [00:02<00:00, 2.47it/s]
100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
- 2025-08-24 23:55:30,964 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.19s/it]
50%|█████ | 2/4 [00:01<00:01, 1.50it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.04it/s]
100%|██████████| 4/4 [00:02<00:00, 2.47it/s]
100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
- 2025-08-24 23:55:30,964 - sglang - INFO - [2025-08-24 23:55:30 TP0] Capture cuda graph end. Time elapsed: 2.05 s
- 2025-08-24 23:55:30,964 - __main__ - INFO - [2025-08-24 23:55:30 TP0] Capture cuda graph end. Time elapsed: 2.05 s
- 2025-08-24 23:55:31,675 - sglang - INFO - [2025-08-24 23:55:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:55:31,676 - __main__ - INFO - [2025-08-24 23:55:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:55:31,760 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
- 2025-08-24 23:55:32,804 - sglang - INFO - [2025-08-24 23:55:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:55:32,804 - __main__ - INFO - [2025-08-24 23:55:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:55:32,804 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:55:32,854 - __main__ - INFO - sglang server is ready.
- 2025-08-24 23:55:32,854 - __main__ - INFO - Queue remaining: 1
- 2025-08-24 23:55:32,854 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:55:32,854 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-24 23:55:32,855 - __main__ - INFO - Worker 0 processing work item 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:55:32,855 - __main__ - INFO - Created all tasks for 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:55:32,863 - __main__ - INFO - Got 1 pages to do for ./workspace/map1.pdf in worker 0
- 2025-08-24 23:55:33,327 - sglang - INFO - [2025-08-24 23:55:33] The server is fired up and ready to roll!
- 2025-08-24 23:55:33,327 - __main__ - INFO - [2025-08-24 23:55:33] The server is fired up and ready to roll!
- 2025-08-24 23:55:42,855 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:55:42,855 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:55:42,855 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:55:44,265 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:55:44,435 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
- 2025-08-24 23:55:52,933 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:55:52,933 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:55:52,933 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:55:58,157 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:02,935 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:56:02,935 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:56:02,935 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:56:03,558 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-08-24 23:56:05,826 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:06,133 - __main__ - WARNING - ValueError on attempt 1 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:10,964 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:11,286 - __main__ - WARNING - ValueError on attempt 2 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:12,936 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:56:12,937 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:56:12,937 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:56:15,671 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:16,009 - __main__ - WARNING - ValueError on attempt 3 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:20,410 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:20,768 - __main__ - WARNING - ValueError on attempt 4 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:22,938 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:56:22,938 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:56:22,939 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:56:25,382 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:25,713 - __main__ - WARNING - ValueError on attempt 5 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:29,904 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:30,241 - __main__ - WARNING - ValueError on attempt 6 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:32,940 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:56:32,940 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:56:32,940 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:56:34,598 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:56:34,937 - __main__ - WARNING - ValueError on attempt 7 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:56:34,937 - __main__ - ERROR - Failed to process ./workspace/map1.pdf-1 after 8 attempts.
- 2025-08-24 23:56:35,309 - __main__ - ERROR - Document ./workspace/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
- 2025-08-24 23:56:35,310 - __main__ - INFO - Finished TaskGroup for worker on 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:56:35,310 - __main__ - INFO - Got 0 docs for 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:56:35,311 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-24 23:56:35,312 - __main__ - INFO - Work done
- 2025-08-24 23:56:35,312 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:57:04,821 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:57:04,821 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
- 2025-08-24 23:57:04,821 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:57:04,825 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:57:05,000 - __main__ - INFO - Starting pipeline with PID 482844
- 2025-08-24 23:57:05,000 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:57:05,073 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:57:06,104 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:57:07,161 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:57:08,218 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:57:09,267 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:57:10,371 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:57:11,124 - sglang - INFO - [2025-08-24 23:57:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=608298291, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:57:11,124 - __main__ - INFO - [2025-08-24 23:57:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=608298291, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:57:11,431 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:57:12,069 - sglang - INFO - [2025-08-24 23:57:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:57:12,069 - __main__ - INFO - [2025-08-24 23:57:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:57:12,504 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:57:13,550 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:57:14,596 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:57:15,643 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:57:16,689 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:57:17,735 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:57:18,780 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:57:19,094 - sglang - INFO - [2025-08-24 23:57:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:57:19,094 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:57:19,097 - sglang - INFO - [2025-08-24 23:57:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:57:19,097 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:57:19,097 - sglang - INFO - [2025-08-24 23:57:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:57:19,097 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:57:19,098 - sglang - INFO - [2025-08-24 23:57:19 TP0] Init torch distributed begin.
- 2025-08-24 23:57:19,098 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Init torch distributed begin.
- 2025-08-24 23:57:19,859 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:57:20,488 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:57:42,899 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:57:42,900 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
- 2025-08-24 23:57:42,900 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:57:42,904 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
- 2025-08-24 23:57:43,097 - __main__ - INFO - Starting pipeline with PID 483718
- 2025-08-24 23:57:43,097 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:57:43,196 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:57:44,232 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:57:45,288 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:57:46,333 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:57:47,364 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:57:48,478 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:57:49,539 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:57:49,692 - sglang - INFO - [2025-08-24 23:57:49] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1010487791, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:57:49,692 - __main__ - INFO - [2025-08-24 23:57:49] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1010487791, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:57:50,619 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:57:50,815 - sglang - INFO - [2025-08-24 23:57:50] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:57:50,816 - __main__ - INFO - [2025-08-24 23:57:50] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:57:51,700 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:57:52,767 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:57:53,831 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:57:54,956 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:57:56,012 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:57:57,079 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:57:57,540 - sglang - INFO - [2025-08-24 23:57:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:57:57,540 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Init torch distributed begin.
- 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Init torch distributed begin.
- 2025-08-24 23:57:58,147 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:57:59,210 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:58:00,268 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:58:01,321 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:58:02,367 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:58:02,851 - sglang - INFO - [2025-08-24 23:58:02 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:58:02,851 - __main__ - INFO - [2025-08-24 23:58:02 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:58:03,420 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:58:03,420 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:58:03,421 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:58:04,199 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.24it/s]
- 2025-08-24 23:58:04,199 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.24it/s]
- 2025-08-24 23:58:04,476 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:58:05,056 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
- 2025-08-24 23:58:05,056 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
- 2025-08-24 23:58:05,519 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-24 23:58:05,872 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
- 2025-08-24 23:58:05,872 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
- 2025-08-24 23:58:06,216 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.57it/s]
- 2025-08-24 23:58:06,216 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.57it/s]
- 2025-08-24 23:58:06,216 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.42it/s]
- 2025-08-24 23:58:06,216 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.42it/s]
- 2025-08-24 23:58:06,216 - sglang - INFO -
- 2025-08-24 23:58:06,216 - __main__ - INFO -
- 2025-08-24 23:58:06,262 - sglang - INFO - [2025-08-24 23:58:06 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:58:06,262 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-24 23:58:06,268 - sglang - INFO - [2025-08-24 23:58:06 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:58:06,268 - __main__ - INFO - [2025-08-24 23:58:06 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-24 23:58:06,268 - sglang - INFO - [2025-08-24 23:58:06 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:58:06,268 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-24 23:58:06,416 - sglang - INFO - [2025-08-24 23:58:06 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:58:06,416 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-24 23:58:06,567 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-24 23:58:07,607 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-24 23:58:08,260 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.71it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s]
100%|██████████| 4/4 [00:01<00:00, 2.68it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:58:08,260 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.02s/it]
50%|█████ | 2/4 [00:01<00:01, 1.71it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s]
100%|██████████| 4/4 [00:01<00:00, 2.68it/s]
100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
- 2025-08-24 23:58:08,260 - sglang - INFO - [2025-08-24 23:58:08 TP0] Capture cuda graph end. Time elapsed: 1.84 s
- 2025-08-24 23:58:08,260 - __main__ - INFO - [2025-08-24 23:58:08 TP0] Capture cuda graph end. Time elapsed: 1.84 s
- 2025-08-24 23:58:08,651 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-24 23:58:08,965 - sglang - INFO - [2025-08-24 23:58:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:58:08,965 - __main__ - INFO - [2025-08-24 23:58:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-24 23:58:09,708 - __main__ - INFO - sglang server is ready.
- 2025-08-24 23:58:09,709 - __main__ - INFO - Queue remaining: 1
- 2025-08-24 23:58:09,709 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:09,709 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-24 23:58:09,709 - __main__ - INFO - Worker 0 processing work item 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:58:09,709 - __main__ - INFO - Created all tasks for 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:58:09,717 - __main__ - INFO - Got 1 pages to do for ./workspace/map1.pdf in worker 0
- 2025-08-24 23:58:10,044 - sglang - INFO - [2025-08-24 23:58:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:58:10,044 - __main__ - INFO - [2025-08-24 23:58:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-24 23:58:10,045 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-24 23:58:10,677 - sglang - INFO - [2025-08-24 23:58:10] The server is fired up and ready to roll!
- 2025-08-24 23:58:10,678 - __main__ - INFO - [2025-08-24 23:58:10] The server is fired up and ready to roll!
- 2025-08-24 23:58:19,710 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:58:19,710 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:19,711 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:58:21,237 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:58:21,388 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
- 2025-08-24 23:58:29,733 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:58:29,733 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:29,733 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:58:34,535 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:58:39,734 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:58:39,735 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:39,735 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:58:41,440 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-08-24 23:58:42,288 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:58:42,596 - __main__ - WARNING - ValueError on attempt 1 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:58:47,737 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:58:48,060 - __main__ - WARNING - ValueError on attempt 2 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:58:49,737 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:58:49,738 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:49,738 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:58:52,705 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:58:53,022 - __main__ - WARNING - ValueError on attempt 3 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:58:57,211 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:58:57,582 - __main__ - WARNING - ValueError on attempt 4 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:58:59,739 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:58:59,739 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:58:59,739 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:59:01,984 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:59:02,323 - __main__ - WARNING - ValueError on attempt 5 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:59:06,369 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:59:06,681 - __main__ - WARNING - ValueError on attempt 6 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:59:09,741 - __main__ - INFO - Queue remaining: 0
- 2025-08-24 23:59:09,741 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-24 23:59:09,741 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 1
- 2025-08-24 23:59:10,935 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
- 2025-08-24 23:59:11,295 - __main__ - WARNING - ValueError on attempt 7 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
- 2025-08-24 23:59:11,296 - __main__ - ERROR - Failed to process ./workspace/map1.pdf-1 after 8 attempts.
- 2025-08-24 23:59:11,666 - __main__ - ERROR - Document ./workspace/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
- 2025-08-24 23:59:11,667 - __main__ - INFO - Finished TaskGroup for worker on 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:59:11,667 - __main__ - INFO - Got 0 docs for 064eedd4edcd817030605d106353694b3e3ec8b1
- 2025-08-24 23:59:11,668 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-24 23:59:11,668 - __main__ - INFO - Work done
- 2025-08-24 23:59:11,669 - __main__ - INFO - Got cancellation request for SGLang server
- 2025-08-24 23:59:37,224 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
- 2025-08-24 23:59:37,224 - __main__ - INFO - Loading file at ./workspace/UNETR.pdf as PDF document
- 2025-08-24 23:59:37,224 - __main__ - INFO - Found 1 total pdf paths to add
- 2025-08-24 23:59:37,230 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 11.00
- 2025-08-24 23:59:37,413 - __main__ - INFO - Starting pipeline with PID 484898
- 2025-08-24 23:59:37,413 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
- 2025-08-24 23:59:37,499 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
- 2025-08-24 23:59:38,535 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
- 2025-08-24 23:59:39,594 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
- 2025-08-24 23:59:40,666 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
- 2025-08-24 23:59:41,733 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
- 2025-08-24 23:59:42,799 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
- 2025-08-24 23:59:43,852 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
- 2025-08-24 23:59:44,133 - sglang - INFO - [2025-08-24 23:59:44] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1008456358, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:59:44,133 - __main__ - INFO - [2025-08-24 23:59:44] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1008456358, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
- 2025-08-24 23:59:44,921 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
- 2025-08-24 23:59:45,150 - sglang - INFO - [2025-08-24 23:59:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:59:45,151 - __main__ - INFO - [2025-08-24 23:59:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
- 2025-08-24 23:59:45,968 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
- 2025-08-24 23:59:47,035 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
- 2025-08-24 23:59:48,100 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
- 2025-08-24 23:59:49,173 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
- 2025-08-24 23:59:50,243 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
- 2025-08-24 23:59:51,314 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
- 2025-08-24 23:59:51,579 - sglang - INFO - [2025-08-24 23:59:51 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:59:51,579 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Overlap scheduler is disabled for multimodal models.
- 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
- 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
- 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Init torch distributed begin.
- 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Init torch distributed begin.
- 2025-08-24 23:59:52,346 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
- 2025-08-24 23:59:53,409 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
- 2025-08-24 23:59:54,483 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
- 2025-08-24 23:59:55,552 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
- 2025-08-24 23:59:56,603 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
- 2025-08-24 23:59:56,899 - sglang - INFO - [2025-08-24 23:59:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:59:56,900 - __main__ - INFO - [2025-08-24 23:59:56 TP0] Load weight begin. avail mem=23.33 GB
- 2025-08-24 23:59:57,458 - sglang - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:59:57,458 - __main__ - INFO -
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
- 2025-08-24 23:59:57,678 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
- 2025-08-24 23:59:58,298 - sglang - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
- 2025-08-24 23:59:58,298 - __main__ - INFO -
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
- 2025-08-24 23:59:58,753 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
- 2025-08-24 23:59:59,177 - sglang - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.16it/s]
- 2025-08-24 23:59:59,177 - __main__ - INFO -
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.16it/s]
- 2025-08-24 23:59:59,821 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
- 2025-08-25 00:00:00,029 - sglang - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.17it/s]
- 2025-08-25 00:00:00,029 - __main__ - INFO -
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.17it/s]
- 2025-08-25 00:00:00,388 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.51it/s]
- 2025-08-25 00:00:00,388 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.51it/s]
- 2025-08-25 00:00:00,388 - sglang - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.37it/s]
- 2025-08-25 00:00:00,388 - __main__ - INFO -
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.37it/s]
- 2025-08-25 00:00:00,388 - sglang - INFO -
- 2025-08-25 00:00:00,388 - __main__ - INFO -
- 2025-08-25 00:00:00,434 - sglang - INFO - [2025-08-25 00:00:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-25 00:00:00,435 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
- 2025-08-25 00:00:00,440 - sglang - INFO - [2025-08-25 00:00:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-25 00:00:00,440 - __main__ - INFO - [2025-08-25 00:00:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
- 2025-08-25 00:00:00,441 - sglang - INFO - [2025-08-25 00:00:00 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-25 00:00:00,441 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Memory pool end. avail mem=5.30 GB
- 2025-08-25 00:00:00,590 - sglang - INFO - [2025-08-25 00:00:00 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-25 00:00:00,590 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Capture cuda graph begin. This can take up to several minutes.
- 2025-08-25 00:00:00,881 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
- 2025-08-25 00:00:01,968 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
- 2025-08-25 00:00:02,463 - sglang - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.03s/it]
50%|█████ | 2/4 [00:01<00:01, 1.70it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.61it/s]
100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
- 2025-08-25 00:00:02,463 - __main__ - INFO -
0%| | 0/4 [00:00<?, ?it/s]
25%|██▌ | 1/4 [00:01<00:03, 1.03s/it]
50%|█████ | 2/4 [00:01<00:01, 1.70it/s]
75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s]
100%|██████████| 4/4 [00:01<00:00, 2.61it/s]
100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
- 2025-08-25 00:00:02,463 - sglang - INFO - [2025-08-25 00:00:02 TP0] Capture cuda graph end. Time elapsed: 1.87 s
- 2025-08-25 00:00:02,463 - __main__ - INFO - [2025-08-25 00:00:02 TP0] Capture cuda graph end. Time elapsed: 1.87 s
- 2025-08-25 00:00:03,057 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
- 2025-08-25 00:00:03,168 - sglang - INFO - [2025-08-25 00:00:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-25 00:00:03,169 - __main__ - INFO - [2025-08-25 00:00:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
- 2025-08-25 00:00:04,142 - __main__ - INFO - sglang server is ready.
- 2025-08-25 00:00:04,143 - __main__ - INFO - Queue remaining: 1
- 2025-08-25 00:00:04,143 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-25 00:00:04,143 - __main__ - INFO -
- Worker ID
- ---------
- 2025-08-25 00:00:04,143 - __main__ - INFO - Worker 0 processing work item 73c9399482ed5cf37e1888c000e49ef82a30c10d
- 2025-08-25 00:00:04,143 - __main__ - INFO - Created all tasks for 73c9399482ed5cf37e1888c000e49ef82a30c10d
- 2025-08-25 00:00:04,153 - __main__ - INFO - Got 11 pages to do for ./workspace/UNETR.pdf in worker 0
- 2025-08-25 00:00:04,247 - sglang - INFO - [2025-08-25 00:00:04 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-25 00:00:04,247 - __main__ - INFO - [2025-08-25 00:00:04 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-25 00:00:04,247 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-25 00:00:04,964 - sglang - INFO - [2025-08-25 00:00:04] The server is fired up and ready to roll!
- 2025-08-25 00:00:04,964 - __main__ - INFO - [2025-08-25 00:00:04] The server is fired up and ready to roll!
- 2025-08-25 00:00:11,073 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-2
- 2025-08-25 00:00:11,082 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-1
- 2025-08-25 00:00:11,124 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-3
- 2025-08-25 00:00:11,142 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-6
- 2025-08-25 00:00:11,144 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
- 2025-08-25 00:00:11,154 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-7
- 2025-08-25 00:00:11,165 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-11
- 2025-08-25 00:00:11,175 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-8
- 2025-08-25 00:00:11,178 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-9
- 2025-08-25 00:00:11,179 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-10
- 2025-08-25 00:00:11,250 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-4
- 2025-08-25 00:00:14,144 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:00:14,145 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-25 00:00:14,145 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-08-25 00:00:24,147 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:00:24,147 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-25 00:00:24,147 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-08-25 00:00:28,213 - sglang - INFO - [2025-08-25 00:00:28 TP0] Prefill batch. #new-seq: 1, #new-token: 3390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-25 00:00:28,213 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-25 00:00:29,414 - sglang - INFO - [2025-08-25 00:00:29 TP0] Prefill batch. #new-seq: 4, #new-token: 14323, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.09, #running-req: 1, #queue-req: 6
- 2025-08-25 00:00:29,414 - __main__ - INFO - sglang running req: 1 queue req: 6
- 2025-08-25 00:00:34,149 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:00:34,149 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- 2025-08-25 00:00:34,149 - __main__ - INFO -
- Worker ID | started
- ----------+--------
- 0 | 11
- 2025-08-25 00:00:34,174 - sglang - INFO - [2025-08-25 00:00:34 TP0] Decode batch. #running-req: 5, #token: 17878, token usage: 0.47, gen throughput (token/s): 5.55, #queue-req: 6
- 2025-08-25 00:00:34,174 - __main__ - INFO - sglang running req: 5 queue req: 6
- 2025-08-25 00:00:35,058 - sglang - INFO - [2025-08-25 00:00:35 TP0] Decode batch. #running-req: 5, #token: 18078, token usage: 0.48, gen throughput (token/s): 226.02, #queue-req: 6
- 2025-08-25 00:00:35,059 - __main__ - INFO - sglang running req: 5 queue req: 6
- 2025-08-25 00:00:35,945 - sglang - INFO - [2025-08-25 00:00:35 TP0] Decode batch. #running-req: 5, #token: 18278, token usage: 0.48, gen throughput (token/s): 225.66, #queue-req: 6
- 2025-08-25 00:00:35,945 - __main__ - INFO - sglang running req: 5 queue req: 6
- 2025-08-25 00:00:36,831 - sglang - INFO - [2025-08-25 00:00:36 TP0] Decode batch. #running-req: 5, #token: 18478, token usage: 0.49, gen throughput (token/s): 225.59, #queue-req: 6
- 2025-08-25 00:00:36,832 - __main__ - INFO - sglang running req: 5 queue req: 6
- 2025-08-25 00:00:37,718 - sglang - INFO - [2025-08-25 00:00:37 TP0] Decode batch. #running-req: 5, #token: 18678, token usage: 0.49, gen throughput (token/s): 225.66, #queue-req: 6
- 2025-08-25 00:00:37,718 - __main__ - INFO - sglang running req: 5 queue req: 6
- 2025-08-25 00:00:38,476 - sglang - INFO - [2025-08-25 00:00:38 TP0] Prefill batch. #new-seq: 2, #new-token: 7744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.45, #running-req: 4, #queue-req: 4
- 2025-08-25 00:00:38,476 - __main__ - INFO - sglang running req: 4 queue req: 4
- 2025-08-25 00:00:40,686 - sglang - INFO - [2025-08-25 00:00:40 TP0] Decode batch. #running-req: 6, #token: 24906, token usage: 0.66, gen throughput (token/s): 69.07, #queue-req: 4
- 2025-08-25 00:00:40,686 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:41,595 - sglang - INFO - [2025-08-25 00:00:41 TP0] Decode batch. #running-req: 6, #token: 25146, token usage: 0.66, gen throughput (token/s): 263.89, #queue-req: 4
- 2025-08-25 00:00:41,596 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:42,508 - sglang - INFO - [2025-08-25 00:00:42 TP0] Decode batch. #running-req: 6, #token: 25386, token usage: 0.67, gen throughput (token/s): 263.09, #queue-req: 4
- 2025-08-25 00:00:42,508 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:43,420 - sglang - INFO - [2025-08-25 00:00:43 TP0] Decode batch. #running-req: 6, #token: 25626, token usage: 0.67, gen throughput (token/s): 262.92, #queue-req: 4
- 2025-08-25 00:00:43,421 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:44,151 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:00:44,151 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 22.33 22.33
- sglang_output_tokens 3.39 3.39
- 2025-08-25 00:00:44,152 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 1 | 11
- 2025-08-25 00:00:44,333 - sglang - INFO - [2025-08-25 00:00:44 TP0] Decode batch. #running-req: 6, #token: 25866, token usage: 0.68, gen throughput (token/s): 263.04, #queue-req: 4
- 2025-08-25 00:00:44,333 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:45,247 - sglang - INFO - [2025-08-25 00:00:45 TP0] Decode batch. #running-req: 6, #token: 26106, token usage: 0.69, gen throughput (token/s): 262.55, #queue-req: 4
- 2025-08-25 00:00:45,247 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:46,161 - sglang - INFO - [2025-08-25 00:00:46 TP0] Decode batch. #running-req: 6, #token: 26346, token usage: 0.69, gen throughput (token/s): 262.55, #queue-req: 4
- 2025-08-25 00:00:46,161 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:47,076 - sglang - INFO - [2025-08-25 00:00:47 TP0] Decode batch. #running-req: 6, #token: 26586, token usage: 0.70, gen throughput (token/s): 262.17, #queue-req: 4
- 2025-08-25 00:00:47,077 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:47,992 - sglang - INFO - [2025-08-25 00:00:47 TP0] Decode batch. #running-req: 6, #token: 26826, token usage: 0.71, gen throughput (token/s): 262.07, #queue-req: 4
- 2025-08-25 00:00:47,992 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:48,909 - sglang - INFO - [2025-08-25 00:00:48 TP0] Decode batch. #running-req: 6, #token: 27066, token usage: 0.71, gen throughput (token/s): 261.91, #queue-req: 4
- 2025-08-25 00:00:48,909 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:49,826 - sglang - INFO - [2025-08-25 00:00:49 TP0] Decode batch. #running-req: 6, #token: 27306, token usage: 0.72, gen throughput (token/s): 261.67, #queue-req: 4
- 2025-08-25 00:00:49,826 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:50,744 - sglang - INFO - [2025-08-25 00:00:50 TP0] Decode batch. #running-req: 6, #token: 27546, token usage: 0.73, gen throughput (token/s): 261.37, #queue-req: 4
- 2025-08-25 00:00:50,744 - __main__ - INFO - sglang running req: 6 queue req: 4
- 2025-08-25 00:00:51,640 - sglang - INFO - [2025-08-25 00:00:51 TP0] Prefill batch. #new-seq: 1, #new-token: 3935, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 5, #queue-req: 3
- 2025-08-25 00:00:51,640 - __main__ - INFO - sglang running req: 5 queue req: 3
- 2025-08-25 00:00:52,715 - sglang - INFO - [2025-08-25 00:00:52 TP0] Decode batch. #running-req: 6, #token: 27740, token usage: 0.73, gen throughput (token/s): 121.25, #queue-req: 3
- 2025-08-25 00:00:52,715 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-08-25 00:00:53,630 - sglang - INFO - [2025-08-25 00:00:53 TP0] Decode batch. #running-req: 6, #token: 27980, token usage: 0.74, gen throughput (token/s): 262.42, #queue-req: 3
- 2025-08-25 00:00:53,630 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-08-25 00:00:54,154 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:00:54,154 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 61.92 61.92
- sglang_output_tokens 12.20 12.20
- 2025-08-25 00:00:54,155 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 2 | 11
- 2025-08-25 00:00:54,545 - sglang - INFO - [2025-08-25 00:00:54 TP0] Decode batch. #running-req: 6, #token: 28220, token usage: 0.74, gen throughput (token/s): 262.13, #queue-req: 3
- 2025-08-25 00:00:54,545 - __main__ - INFO - sglang running req: 6 queue req: 3
- 2025-08-25 00:00:54,706 - sglang - INFO - [2025-08-25 00:00:54 TP0] Prefill batch. #new-seq: 1, #new-token: 3827, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 5, #queue-req: 2
- 2025-08-25 00:00:54,706 - __main__ - INFO - sglang running req: 5 queue req: 2
- 2025-08-25 00:00:56,499 - sglang - INFO - [2025-08-25 00:00:56 TP0] Decode batch. #running-req: 6, #token: 28948, token usage: 0.76, gen throughput (token/s): 122.33, #queue-req: 2
- 2025-08-25 00:00:56,499 - __main__ - INFO - sglang running req: 6 queue req: 2
- 2025-08-25 00:00:57,418 - sglang - INFO - [2025-08-25 00:00:57 TP0] Decode batch. #running-req: 6, #token: 29188, token usage: 0.77, gen throughput (token/s): 261.19, #queue-req: 2
- 2025-08-25 00:00:57,418 - __main__ - INFO - sglang running req: 6 queue req: 2
- 2025-08-25 00:00:58,338 - sglang - INFO - [2025-08-25 00:00:58 TP0] Decode batch. #running-req: 6, #token: 29428, token usage: 0.77, gen throughput (token/s): 260.81, #queue-req: 2
- 2025-08-25 00:00:58,338 - __main__ - INFO - sglang running req: 6 queue req: 2
- 2025-08-25 00:00:59,258 - sglang - INFO - [2025-08-25 00:00:59 TP0] Decode batch. #running-req: 6, #token: 29668, token usage: 0.78, gen throughput (token/s): 260.89, #queue-req: 2
- 2025-08-25 00:00:59,258 - __main__ - INFO - sglang running req: 6 queue req: 2
- 2025-08-25 00:00:59,625 - sglang - INFO - [2025-08-25 00:00:59 TP0] Prefill batch. #new-seq: 1, #new-token: 3860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 5, #queue-req: 1
- 2025-08-25 00:00:59,625 - __main__ - INFO - sglang running req: 5 queue req: 1
- 2025-08-25 00:01:01,222 - sglang - INFO - [2025-08-25 00:01:01 TP0] Decode batch. #running-req: 6, #token: 28048, token usage: 0.74, gen throughput (token/s): 121.67, #queue-req: 1
- 2025-08-25 00:01:01,223 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:02,141 - sglang - INFO - [2025-08-25 00:01:02 TP0] Decode batch. #running-req: 6, #token: 28288, token usage: 0.74, gen throughput (token/s): 261.19, #queue-req: 1
- 2025-08-25 00:01:02,141 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:03,059 - sglang - INFO - [2025-08-25 00:01:03 TP0] Decode batch. #running-req: 6, #token: 28528, token usage: 0.75, gen throughput (token/s): 261.54, #queue-req: 1
- 2025-08-25 00:01:03,059 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:03,977 - sglang - INFO - [2025-08-25 00:01:03 TP0] Decode batch. #running-req: 6, #token: 28768, token usage: 0.76, gen throughput (token/s): 261.29, #queue-req: 1
- 2025-08-25 00:01:03,978 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:04,156 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:04,156 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 143.86 143.86
- sglang_output_tokens 25.93 25.93
- 2025-08-25 00:01:04,157 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 4 | 11
- 2025-08-25 00:01:04,895 - sglang - INFO - [2025-08-25 00:01:04 TP0] Decode batch. #running-req: 6, #token: 29008, token usage: 0.76, gen throughput (token/s): 261.60, #queue-req: 1
- 2025-08-25 00:01:04,895 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:05,812 - sglang - INFO - [2025-08-25 00:01:05 TP0] Decode batch. #running-req: 6, #token: 29248, token usage: 0.77, gen throughput (token/s): 261.74, #queue-req: 1
- 2025-08-25 00:01:05,812 - __main__ - INFO - sglang running req: 6 queue req: 1
- 2025-08-25 00:01:05,996 - sglang - INFO - [2025-08-25 00:01:05 TP0] Prefill batch. #new-seq: 1, #new-token: 3792, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 5, #queue-req: 0
- 2025-08-25 00:01:05,996 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-25 00:01:06,698 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
- 2025-08-25 00:01:07,768 - sglang - INFO - [2025-08-25 00:01:07 TP0] Decode batch. #running-req: 6, #token: 28689, token usage: 0.76, gen throughput (token/s): 122.17, #queue-req: 0
- 2025-08-25 00:01:07,768 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:08,687 - sglang - INFO - [2025-08-25 00:01:08 TP0] Decode batch. #running-req: 6, #token: 28929, token usage: 0.76, gen throughput (token/s): 261.04, #queue-req: 0
- 2025-08-25 00:01:08,688 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:09,605 - sglang - INFO - [2025-08-25 00:01:09 TP0] Decode batch. #running-req: 6, #token: 29169, token usage: 0.77, gen throughput (token/s): 261.46, #queue-req: 0
- 2025-08-25 00:01:09,606 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:10,525 - sglang - INFO - [2025-08-25 00:01:10 TP0] Decode batch. #running-req: 6, #token: 29409, token usage: 0.77, gen throughput (token/s): 261.02, #queue-req: 0
- 2025-08-25 00:01:10,525 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:11,444 - sglang - INFO - [2025-08-25 00:01:11 TP0] Decode batch. #running-req: 6, #token: 29649, token usage: 0.78, gen throughput (token/s): 261.16, #queue-req: 0
- 2025-08-25 00:01:11,444 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:12,364 - sglang - INFO - [2025-08-25 00:01:12 TP0] Decode batch. #running-req: 6, #token: 29889, token usage: 0.79, gen throughput (token/s): 260.81, #queue-req: 0
- 2025-08-25 00:01:12,364 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:13,286 - sglang - INFO - [2025-08-25 00:01:13 TP0] Decode batch. #running-req: 6, #token: 30129, token usage: 0.79, gen throughput (token/s): 260.49, #queue-req: 0
- 2025-08-25 00:01:13,286 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:14,158 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:14,159 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 163.99 163.99
- sglang_output_tokens 35.64 35.64
- 2025-08-25 00:01:14,159 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 5 | 11
- 2025-08-25 00:01:14,207 - sglang - INFO - [2025-08-25 00:01:14 TP0] Decode batch. #running-req: 6, #token: 30369, token usage: 0.80, gen throughput (token/s): 260.52, #queue-req: 0
- 2025-08-25 00:01:14,207 - __main__ - INFO - sglang running req: 6 queue req: 0
- 2025-08-25 00:01:15,128 - sglang - INFO - [2025-08-25 00:01:15 TP0] Decode batch. #running-req: 5, #token: 24750, token usage: 0.65, gen throughput (token/s): 254.11, #queue-req: 0
- 2025-08-25 00:01:15,128 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-25 00:01:16,033 - sglang - INFO - [2025-08-25 00:01:16 TP0] Decode batch. #running-req: 5, #token: 24950, token usage: 0.66, gen throughput (token/s): 220.87, #queue-req: 0
- 2025-08-25 00:01:16,033 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-25 00:01:16,941 - sglang - INFO - [2025-08-25 00:01:16 TP0] Decode batch. #running-req: 5, #token: 25150, token usage: 0.66, gen throughput (token/s): 220.33, #queue-req: 0
- 2025-08-25 00:01:16,941 - __main__ - INFO - sglang running req: 5 queue req: 0
- 2025-08-25 00:01:17,830 - sglang - INFO - [2025-08-25 00:01:17 TP0] Decode batch. #running-req: 4, #token: 20786, token usage: 0.55, gen throughput (token/s): 180.98, #queue-req: 0
- 2025-08-25 00:01:17,831 - __main__ - INFO - sglang running req: 4 queue req: 0
- 2025-08-25 00:01:18,707 - sglang - INFO - [2025-08-25 00:01:18 TP0] Decode batch. #running-req: 3, #token: 16010, token usage: 0.42, gen throughput (token/s): 137.99, #queue-req: 0
- 2025-08-25 00:01:18,707 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:19,584 - sglang - INFO - [2025-08-25 00:01:19 TP0] Decode batch. #running-req: 3, #token: 16130, token usage: 0.42, gen throughput (token/s): 136.93, #queue-req: 0
- 2025-08-25 00:01:19,584 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:20,460 - sglang - INFO - [2025-08-25 00:01:20 TP0] Decode batch. #running-req: 3, #token: 16250, token usage: 0.43, gen throughput (token/s): 136.88, #queue-req: 0
- 2025-08-25 00:01:20,460 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:21,338 - sglang - INFO - [2025-08-25 00:01:21 TP0] Decode batch. #running-req: 3, #token: 16370, token usage: 0.43, gen throughput (token/s): 136.79, #queue-req: 0
- 2025-08-25 00:01:21,338 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:22,215 - sglang - INFO - [2025-08-25 00:01:22 TP0] Decode batch. #running-req: 3, #token: 16490, token usage: 0.43, gen throughput (token/s): 136.80, #queue-req: 0
- 2025-08-25 00:01:22,215 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:23,091 - sglang - INFO - [2025-08-25 00:01:23 TP0] Decode batch. #running-req: 3, #token: 16610, token usage: 0.44, gen throughput (token/s): 136.97, #queue-req: 0
- 2025-08-25 00:01:23,091 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:23,970 - sglang - INFO - [2025-08-25 00:01:23 TP0] Decode batch. #running-req: 3, #token: 16730, token usage: 0.44, gen throughput (token/s): 136.56, #queue-req: 0
- 2025-08-25 00:01:23,970 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:24,160 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:24,160 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 261.80 261.80
- sglang_output_tokens 62.01 62.01
- 2025-08-25 00:01:24,160 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 11
- 2025-08-25 00:01:24,848 - sglang - INFO - [2025-08-25 00:01:24 TP0] Decode batch. #running-req: 3, #token: 16850, token usage: 0.44, gen throughput (token/s): 136.64, #queue-req: 0
- 2025-08-25 00:01:24,848 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:25,728 - sglang - INFO - [2025-08-25 00:01:25 TP0] Decode batch. #running-req: 3, #token: 16970, token usage: 0.45, gen throughput (token/s): 136.30, #queue-req: 0
- 2025-08-25 00:01:25,728 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:26,606 - sglang - INFO - [2025-08-25 00:01:26 TP0] Decode batch. #running-req: 3, #token: 17090, token usage: 0.45, gen throughput (token/s): 136.68, #queue-req: 0
- 2025-08-25 00:01:26,607 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:27,486 - sglang - INFO - [2025-08-25 00:01:27 TP0] Decode batch. #running-req: 3, #token: 17210, token usage: 0.45, gen throughput (token/s): 136.40, #queue-req: 0
- 2025-08-25 00:01:27,486 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:28,367 - sglang - INFO - [2025-08-25 00:01:28 TP0] Decode batch. #running-req: 3, #token: 17330, token usage: 0.46, gen throughput (token/s): 136.26, #queue-req: 0
- 2025-08-25 00:01:28,367 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:29,247 - sglang - INFO - [2025-08-25 00:01:29 TP0] Decode batch. #running-req: 3, #token: 17450, token usage: 0.46, gen throughput (token/s): 136.26, #queue-req: 0
- 2025-08-25 00:01:29,248 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:30,128 - sglang - INFO - [2025-08-25 00:01:30 TP0] Decode batch. #running-req: 3, #token: 17570, token usage: 0.46, gen throughput (token/s): 136.21, #queue-req: 0
- 2025-08-25 00:01:30,129 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:31,009 - sglang - INFO - [2025-08-25 00:01:31 TP0] Decode batch. #running-req: 3, #token: 17690, token usage: 0.47, gen throughput (token/s): 136.25, #queue-req: 0
- 2025-08-25 00:01:31,009 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:31,890 - sglang - INFO - [2025-08-25 00:01:31 TP0] Decode batch. #running-req: 3, #token: 17810, token usage: 0.47, gen throughput (token/s): 136.24, #queue-req: 0
- 2025-08-25 00:01:31,890 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:32,772 - sglang - INFO - [2025-08-25 00:01:32 TP0] Decode batch. #running-req: 3, #token: 17930, token usage: 0.47, gen throughput (token/s): 136.04, #queue-req: 0
- 2025-08-25 00:01:32,772 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:33,656 - sglang - INFO - [2025-08-25 00:01:33 TP0] Decode batch. #running-req: 3, #token: 18050, token usage: 0.48, gen throughput (token/s): 135.74, #queue-req: 0
- 2025-08-25 00:01:33,656 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:34,162 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:34,162 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 239.41 239.41
- sglang_output_tokens 56.71 56.71
- 2025-08-25 00:01:34,162 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 11
- 2025-08-25 00:01:34,539 - sglang - INFO - [2025-08-25 00:01:34 TP0] Decode batch. #running-req: 3, #token: 18170, token usage: 0.48, gen throughput (token/s): 135.86, #queue-req: 0
- 2025-08-25 00:01:34,540 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:35,424 - sglang - INFO - [2025-08-25 00:01:35 TP0] Decode batch. #running-req: 3, #token: 18290, token usage: 0.48, gen throughput (token/s): 135.70, #queue-req: 0
- 2025-08-25 00:01:35,424 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:36,308 - sglang - INFO - [2025-08-25 00:01:36 TP0] Decode batch. #running-req: 3, #token: 18410, token usage: 0.48, gen throughput (token/s): 135.77, #queue-req: 0
- 2025-08-25 00:01:36,308 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:37,191 - sglang - INFO - [2025-08-25 00:01:37 TP0] Decode batch. #running-req: 3, #token: 18530, token usage: 0.49, gen throughput (token/s): 135.82, #queue-req: 0
- 2025-08-25 00:01:37,191 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:38,075 - sglang - INFO - [2025-08-25 00:01:38 TP0] Decode batch. #running-req: 3, #token: 18650, token usage: 0.49, gen throughput (token/s): 135.72, #queue-req: 0
- 2025-08-25 00:01:38,075 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:38,960 - sglang - INFO - [2025-08-25 00:01:38 TP0] Decode batch. #running-req: 3, #token: 18770, token usage: 0.49, gen throughput (token/s): 135.66, #queue-req: 0
- 2025-08-25 00:01:38,960 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:39,843 - sglang - INFO - [2025-08-25 00:01:39 TP0] Decode batch. #running-req: 3, #token: 18890, token usage: 0.50, gen throughput (token/s): 135.77, #queue-req: 0
- 2025-08-25 00:01:39,844 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:40,729 - sglang - INFO - [2025-08-25 00:01:40 TP0] Decode batch. #running-req: 3, #token: 19010, token usage: 0.50, gen throughput (token/s): 135.56, #queue-req: 0
- 2025-08-25 00:01:40,729 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:41,615 - sglang - INFO - [2025-08-25 00:01:41 TP0] Decode batch. #running-req: 3, #token: 19130, token usage: 0.50, gen throughput (token/s): 135.41, #queue-req: 0
- 2025-08-25 00:01:41,615 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:42,500 - sglang - INFO - [2025-08-25 00:01:42 TP0] Decode batch. #running-req: 3, #token: 19250, token usage: 0.51, gen throughput (token/s): 135.50, #queue-req: 0
- 2025-08-25 00:01:42,501 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:43,387 - sglang - INFO - [2025-08-25 00:01:43 TP0] Decode batch. #running-req: 3, #token: 19370, token usage: 0.51, gen throughput (token/s): 135.36, #queue-req: 0
- 2025-08-25 00:01:43,387 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:44,164 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:44,164 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 220.55 220.55
- sglang_output_tokens 52.24 52.24
- 2025-08-25 00:01:44,164 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 8 | 11
- 2025-08-25 00:01:44,274 - sglang - INFO - [2025-08-25 00:01:44 TP0] Decode batch. #running-req: 3, #token: 19490, token usage: 0.51, gen throughput (token/s): 135.32, #queue-req: 0
- 2025-08-25 00:01:44,274 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:45,160 - sglang - INFO - [2025-08-25 00:01:45 TP0] Decode batch. #running-req: 3, #token: 19610, token usage: 0.52, gen throughput (token/s): 135.43, #queue-req: 0
- 2025-08-25 00:01:45,160 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:46,047 - sglang - INFO - [2025-08-25 00:01:46 TP0] Decode batch. #running-req: 3, #token: 19730, token usage: 0.52, gen throughput (token/s): 135.35, #queue-req: 0
- 2025-08-25 00:01:46,047 - __main__ - INFO - sglang running req: 3 queue req: 0
- 2025-08-25 00:01:46,924 - sglang - INFO - [2025-08-25 00:01:46 TP0] Decode batch. #running-req: 2, #token: 13830, token usage: 0.36, gen throughput (token/s): 116.28, #queue-req: 0
- 2025-08-25 00:01:46,924 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:47,058 - __main__ - INFO - Reducing anchor text len to 3000 for ./workspace/UNETR.pdf-5
- 2025-08-25 00:01:47,059 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/UNETR.pdf-5: <class 'ValueError'> - Response exceeded model_max_context, cannot use this response
- 2025-08-25 00:01:47,389 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
- 2025-08-25 00:01:47,611 - sglang - INFO - [2025-08-25 00:01:47 TP0] Prefill batch. #new-seq: 1, #new-token: 3206, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.15, #running-req: 1, #queue-req: 0
- 2025-08-25 00:01:47,611 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:48,704 - sglang - INFO - [2025-08-25 00:01:48 TP0] Decode batch. #running-req: 2, #token: 8838, token usage: 0.23, gen throughput (token/s): 30.33, #queue-req: 0
- 2025-08-25 00:01:48,704 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:49,557 - sglang - INFO - [2025-08-25 00:01:49 TP0] Decode batch. #running-req: 2, #token: 8918, token usage: 0.23, gen throughput (token/s): 93.81, #queue-req: 0
- 2025-08-25 00:01:49,557 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:50,409 - sglang - INFO - [2025-08-25 00:01:50 TP0] Decode batch. #running-req: 2, #token: 8998, token usage: 0.24, gen throughput (token/s): 93.85, #queue-req: 0
- 2025-08-25 00:01:50,410 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:51,262 - sglang - INFO - [2025-08-25 00:01:51 TP0] Decode batch. #running-req: 2, #token: 9078, token usage: 0.24, gen throughput (token/s): 93.76, #queue-req: 0
- 2025-08-25 00:01:51,263 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:52,116 - sglang - INFO - [2025-08-25 00:01:52 TP0] Decode batch. #running-req: 2, #token: 9158, token usage: 0.24, gen throughput (token/s): 93.73, #queue-req: 0
- 2025-08-25 00:01:52,116 - __main__ - INFO - sglang running req: 2 queue req: 0
- 2025-08-25 00:01:52,964 - sglang - INFO - [2025-08-25 00:01:52 TP0] Decode batch. #running-req: 1, #token: 3414, token usage: 0.09, gen throughput (token/s): 76.63, #queue-req: 0
- 2025-08-25 00:01:52,965 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:53,802 - sglang - INFO - [2025-08-25 00:01:53 TP0] Decode batch. #running-req: 1, #token: 3454, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-08-25 00:01:53,802 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:54,166 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:01:54,166 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 260.08 260.08
- sglang_output_tokens 79.05 79.05
- 2025-08-25 00:01:54,166 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:01:54,639 - sglang - INFO - [2025-08-25 00:01:54 TP0] Decode batch. #running-req: 1, #token: 3494, token usage: 0.09, gen throughput (token/s): 47.78, #queue-req: 0
- 2025-08-25 00:01:54,639 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:55,477 - sglang - INFO - [2025-08-25 00:01:55 TP0] Decode batch. #running-req: 1, #token: 3534, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
- 2025-08-25 00:01:55,477 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:56,316 - sglang - INFO - [2025-08-25 00:01:56 TP0] Decode batch. #running-req: 1, #token: 3574, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-08-25 00:01:56,316 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:57,154 - sglang - INFO - [2025-08-25 00:01:57 TP0] Decode batch. #running-req: 1, #token: 3614, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-08-25 00:01:57,154 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:57,992 - sglang - INFO - [2025-08-25 00:01:57 TP0] Decode batch. #running-req: 1, #token: 3654, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-08-25 00:01:57,993 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:58,832 - sglang - INFO - [2025-08-25 00:01:58 TP0] Decode batch. #running-req: 1, #token: 3694, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-08-25 00:01:58,832 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:01:59,671 - sglang - INFO - [2025-08-25 00:01:59 TP0] Decode batch. #running-req: 1, #token: 3734, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-08-25 00:01:59,671 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:00,511 - sglang - INFO - [2025-08-25 00:02:00 TP0] Decode batch. #running-req: 1, #token: 3774, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-08-25 00:02:00,512 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:01,351 - sglang - INFO - [2025-08-25 00:02:01 TP0] Decode batch. #running-req: 1, #token: 3814, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-08-25 00:02:01,351 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:02,190 - sglang - INFO - [2025-08-25 00:02:02 TP0] Decode batch. #running-req: 1, #token: 3854, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:02:02,191 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:03,030 - sglang - INFO - [2025-08-25 00:02:03 TP0] Decode batch. #running-req: 1, #token: 3894, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-08-25 00:02:03,030 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:03,869 - sglang - INFO - [2025-08-25 00:02:03 TP0] Decode batch. #running-req: 1, #token: 3934, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-08-25 00:02:03,870 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:04,167 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:04,168 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 242.38 242.38
- sglang_output_tokens 73.67 73.67
- 2025-08-25 00:02:04,168 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:04,709 - sglang - INFO - [2025-08-25 00:02:04 TP0] Decode batch. #running-req: 1, #token: 3974, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-08-25 00:02:04,709 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:05,548 - sglang - INFO - [2025-08-25 00:02:05 TP0] Decode batch. #running-req: 1, #token: 4014, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-08-25 00:02:05,548 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:06,387 - sglang - INFO - [2025-08-25 00:02:06 TP0] Decode batch. #running-req: 1, #token: 4054, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-08-25 00:02:06,387 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:07,226 - sglang - INFO - [2025-08-25 00:02:07 TP0] Decode batch. #running-req: 1, #token: 4094, token usage: 0.11, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:02:07,227 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:08,068 - sglang - INFO - [2025-08-25 00:02:08 TP0] Decode batch. #running-req: 1, #token: 4134, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-08-25 00:02:08,068 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:08,911 - sglang - INFO - [2025-08-25 00:02:08 TP0] Decode batch. #running-req: 1, #token: 4174, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-08-25 00:02:08,911 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:09,752 - sglang - INFO - [2025-08-25 00:02:09 TP0] Decode batch. #running-req: 1, #token: 4214, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-08-25 00:02:09,752 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:10,592 - sglang - INFO - [2025-08-25 00:02:10 TP0] Decode batch. #running-req: 1, #token: 4254, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-08-25 00:02:10,593 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:11,434 - sglang - INFO - [2025-08-25 00:02:11 TP0] Decode batch. #running-req: 1, #token: 4294, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-08-25 00:02:11,434 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:12,276 - sglang - INFO - [2025-08-25 00:02:12 TP0] Decode batch. #running-req: 1, #token: 4334, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
- 2025-08-25 00:02:12,277 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:13,120 - sglang - INFO - [2025-08-25 00:02:13 TP0] Decode batch. #running-req: 1, #token: 4374, token usage: 0.12, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-08-25 00:02:13,121 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:13,963 - sglang - INFO - [2025-08-25 00:02:13 TP0] Decode batch. #running-req: 1, #token: 4414, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-08-25 00:02:13,963 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:14,170 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:14,170 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 226.93 226.93
- sglang_output_tokens 68.97 68.97
- 2025-08-25 00:02:14,170 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:14,804 - sglang - INFO - [2025-08-25 00:02:14 TP0] Decode batch. #running-req: 1, #token: 4454, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-08-25 00:02:14,805 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:15,646 - sglang - INFO - [2025-08-25 00:02:15 TP0] Decode batch. #running-req: 1, #token: 4494, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
- 2025-08-25 00:02:15,646 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:16,489 - sglang - INFO - [2025-08-25 00:02:16 TP0] Decode batch. #running-req: 1, #token: 4534, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-08-25 00:02:16,489 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:17,331 - sglang - INFO - [2025-08-25 00:02:17 TP0] Decode batch. #running-req: 1, #token: 4574, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-08-25 00:02:17,331 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:18,173 - sglang - INFO - [2025-08-25 00:02:18 TP0] Decode batch. #running-req: 1, #token: 4614, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-08-25 00:02:18,173 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:19,016 - sglang - INFO - [2025-08-25 00:02:19 TP0] Decode batch. #running-req: 1, #token: 4654, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-08-25 00:02:19,017 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:19,859 - sglang - INFO - [2025-08-25 00:02:19 TP0] Decode batch. #running-req: 1, #token: 4694, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-08-25 00:02:19,860 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:20,704 - sglang - INFO - [2025-08-25 00:02:20 TP0] Decode batch. #running-req: 1, #token: 4734, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-08-25 00:02:20,704 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:21,548 - sglang - INFO - [2025-08-25 00:02:21 TP0] Decode batch. #running-req: 1, #token: 4774, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-08-25 00:02:21,548 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:22,392 - sglang - INFO - [2025-08-25 00:02:22 TP0] Decode batch. #running-req: 1, #token: 4814, token usage: 0.13, gen throughput (token/s): 47.37, #queue-req: 0
- 2025-08-25 00:02:22,392 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:23,236 - sglang - INFO - [2025-08-25 00:02:23 TP0] Decode batch. #running-req: 1, #token: 4854, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-08-25 00:02:23,236 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:24,089 - sglang - INFO - [2025-08-25 00:02:24 TP0] Decode batch. #running-req: 1, #token: 4894, token usage: 0.13, gen throughput (token/s): 46.90, #queue-req: 0
- 2025-08-25 00:02:24,089 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:24,171 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:24,172 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 213.34 213.34
- sglang_output_tokens 64.84 64.84
- 2025-08-25 00:02:24,172 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:24,939 - sglang - INFO - [2025-08-25 00:02:24 TP0] Decode batch. #running-req: 1, #token: 4934, token usage: 0.13, gen throughput (token/s): 47.04, #queue-req: 0
- 2025-08-25 00:02:24,939 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:25,790 - sglang - INFO - [2025-08-25 00:02:25 TP0] Decode batch. #running-req: 1, #token: 4974, token usage: 0.13, gen throughput (token/s): 46.99, #queue-req: 0
- 2025-08-25 00:02:25,791 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:26,640 - sglang - INFO - [2025-08-25 00:02:26 TP0] Decode batch. #running-req: 1, #token: 5014, token usage: 0.13, gen throughput (token/s): 47.09, #queue-req: 0
- 2025-08-25 00:02:26,640 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:27,485 - sglang - INFO - [2025-08-25 00:02:27 TP0] Decode batch. #running-req: 1, #token: 5054, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-08-25 00:02:27,485 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:28,330 - sglang - INFO - [2025-08-25 00:02:28 TP0] Decode batch. #running-req: 1, #token: 5094, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-08-25 00:02:28,331 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:29,176 - sglang - INFO - [2025-08-25 00:02:29 TP0] Decode batch. #running-req: 1, #token: 5134, token usage: 0.14, gen throughput (token/s): 47.29, #queue-req: 0
- 2025-08-25 00:02:29,177 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:30,023 - sglang - INFO - [2025-08-25 00:02:30 TP0] Decode batch. #running-req: 1, #token: 5174, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-08-25 00:02:30,024 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:30,871 - sglang - INFO - [2025-08-25 00:02:30 TP0] Decode batch. #running-req: 1, #token: 5214, token usage: 0.14, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-08-25 00:02:30,871 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:31,716 - sglang - INFO - [2025-08-25 00:02:31 TP0] Decode batch. #running-req: 1, #token: 5254, token usage: 0.14, gen throughput (token/s): 47.30, #queue-req: 0
- 2025-08-25 00:02:31,717 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:32,564 - sglang - INFO - [2025-08-25 00:02:32 TP0] Decode batch. #running-req: 1, #token: 5294, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-08-25 00:02:32,564 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:33,412 - sglang - INFO - [2025-08-25 00:02:33 TP0] Decode batch. #running-req: 1, #token: 5334, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:02:33,412 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:34,172 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:34,173 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 201.28 201.28
- sglang_output_tokens 61.18 61.18
- 2025-08-25 00:02:34,173 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:34,260 - sglang - INFO - [2025-08-25 00:02:34 TP0] Decode batch. #running-req: 1, #token: 5374, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:02:34,260 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:35,108 - sglang - INFO - [2025-08-25 00:02:35 TP0] Decode batch. #running-req: 1, #token: 5414, token usage: 0.14, gen throughput (token/s): 47.15, #queue-req: 0
- 2025-08-25 00:02:35,108 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:35,955 - sglang - INFO - [2025-08-25 00:02:35 TP0] Decode batch. #running-req: 1, #token: 5454, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-08-25 00:02:35,955 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:36,802 - sglang - INFO - [2025-08-25 00:02:36 TP0] Decode batch. #running-req: 1, #token: 5494, token usage: 0.14, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-08-25 00:02:36,802 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:37,649 - sglang - INFO - [2025-08-25 00:02:37 TP0] Decode batch. #running-req: 1, #token: 5534, token usage: 0.15, gen throughput (token/s): 47.21, #queue-req: 0
- 2025-08-25 00:02:37,650 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:38,497 - sglang - INFO - [2025-08-25 00:02:38 TP0] Decode batch. #running-req: 1, #token: 5574, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:02:38,498 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:39,347 - sglang - INFO - [2025-08-25 00:02:39 TP0] Decode batch. #running-req: 1, #token: 5614, token usage: 0.15, gen throughput (token/s): 47.09, #queue-req: 0
- 2025-08-25 00:02:39,347 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:40,195 - sglang - INFO - [2025-08-25 00:02:40 TP0] Decode batch. #running-req: 1, #token: 5654, token usage: 0.15, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:02:40,195 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:41,043 - sglang - INFO - [2025-08-25 00:02:41 TP0] Decode batch. #running-req: 1, #token: 5694, token usage: 0.15, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:02:41,044 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:41,891 - sglang - INFO - [2025-08-25 00:02:41 TP0] Decode batch. #running-req: 1, #token: 5734, token usage: 0.15, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-08-25 00:02:41,891 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:42,740 - sglang - INFO - [2025-08-25 00:02:42 TP0] Decode batch. #running-req: 1, #token: 5774, token usage: 0.15, gen throughput (token/s): 47.15, #queue-req: 0
- 2025-08-25 00:02:42,740 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:43,590 - sglang - INFO - [2025-08-25 00:02:43 TP0] Decode batch. #running-req: 1, #token: 5814, token usage: 0.15, gen throughput (token/s): 47.06, #queue-req: 0
- 2025-08-25 00:02:43,590 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:44,174 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:44,174 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 190.51 190.51
- sglang_output_tokens 57.91 57.91
- 2025-08-25 00:02:44,174 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:44,439 - sglang - INFO - [2025-08-25 00:02:44 TP0] Decode batch. #running-req: 1, #token: 5854, token usage: 0.15, gen throughput (token/s): 47.12, #queue-req: 0
- 2025-08-25 00:02:44,439 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:45,287 - sglang - INFO - [2025-08-25 00:02:45 TP0] Decode batch. #running-req: 1, #token: 5894, token usage: 0.16, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:02:45,287 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:46,133 - sglang - INFO - [2025-08-25 00:02:46 TP0] Decode batch. #running-req: 1, #token: 5934, token usage: 0.16, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-08-25 00:02:46,133 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:46,982 - sglang - INFO - [2025-08-25 00:02:46 TP0] Decode batch. #running-req: 1, #token: 5974, token usage: 0.16, gen throughput (token/s): 47.13, #queue-req: 0
- 2025-08-25 00:02:46,982 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:47,830 - sglang - INFO - [2025-08-25 00:02:47 TP0] Decode batch. #running-req: 1, #token: 6014, token usage: 0.16, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:02:47,830 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:48,676 - sglang - INFO - [2025-08-25 00:02:48 TP0] Decode batch. #running-req: 1, #token: 6054, token usage: 0.16, gen throughput (token/s): 47.25, #queue-req: 0
- 2025-08-25 00:02:48,677 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:49,525 - sglang - INFO - [2025-08-25 00:02:49 TP0] Decode batch. #running-req: 1, #token: 6094, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:02:49,525 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:50,373 - sglang - INFO - [2025-08-25 00:02:50 TP0] Decode batch. #running-req: 1, #token: 6134, token usage: 0.16, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-08-25 00:02:50,373 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:51,221 - sglang - INFO - [2025-08-25 00:02:51 TP0] Decode batch. #running-req: 1, #token: 6174, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:02:51,221 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:51,885 - __main__ - WARNING - JSON decode error on attempt 1 for ./workspace/UNETR.pdf-5: Unterminated string starting at: line 1 column 126 (char 125)
- 2025-08-25 00:02:52,151 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
- 2025-08-25 00:02:52,318 - sglang - INFO - [2025-08-25 00:02:52 TP0] Prefill batch. #new-seq: 1, #new-token: 3125, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
- 2025-08-25 00:02:52,318 - __main__ - INFO - sglang running req: 0 queue req: 0
- 2025-08-25 00:02:53,418 - sglang - INFO - [2025-08-25 00:02:53 TP0] Decode batch. #running-req: 1, #token: 3134, token usage: 0.08, gen throughput (token/s): 18.21, #queue-req: 0
- 2025-08-25 00:02:53,418 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:54,175 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:02:54,175 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 197.11 197.11
- sglang_output_tokens 70.20 70.20
- 2025-08-25 00:02:54,175 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:02:54,256 - sglang - INFO - [2025-08-25 00:02:54 TP0] Decode batch. #running-req: 1, #token: 3174, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
- 2025-08-25 00:02:54,257 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:55,095 - sglang - INFO - [2025-08-25 00:02:55 TP0] Decode batch. #running-req: 1, #token: 3214, token usage: 0.08, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-08-25 00:02:55,095 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:55,934 - sglang - INFO - [2025-08-25 00:02:55 TP0] Decode batch. #running-req: 1, #token: 3254, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:02:55,935 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:56,774 - sglang - INFO - [2025-08-25 00:02:56 TP0] Decode batch. #running-req: 1, #token: 3294, token usage: 0.09, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-08-25 00:02:56,774 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:57,613 - sglang - INFO - [2025-08-25 00:02:57 TP0] Decode batch. #running-req: 1, #token: 3334, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:02:57,614 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:58,453 - sglang - INFO - [2025-08-25 00:02:58 TP0] Decode batch. #running-req: 1, #token: 3374, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
- 2025-08-25 00:02:58,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:02:59,292 - sglang - INFO - [2025-08-25 00:02:59 TP0] Decode batch. #running-req: 1, #token: 3414, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-08-25 00:02:59,292 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:00,130 - sglang - INFO - [2025-08-25 00:03:00 TP0] Decode batch. #running-req: 1, #token: 3454, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-08-25 00:03:00,131 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:00,970 - sglang - INFO - [2025-08-25 00:03:00 TP0] Decode batch. #running-req: 1, #token: 3494, token usage: 0.09, gen throughput (token/s): 47.62, #queue-req: 0
- 2025-08-25 00:03:00,971 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:01,810 - sglang - INFO - [2025-08-25 00:03:01 TP0] Decode batch. #running-req: 1, #token: 3534, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:03:01,810 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:02,649 - sglang - INFO - [2025-08-25 00:03:02 TP0] Decode batch. #running-req: 1, #token: 3574, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
- 2025-08-25 00:03:02,649 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:03,488 - sglang - INFO - [2025-08-25 00:03:03 TP0] Decode batch. #running-req: 1, #token: 3614, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
- 2025-08-25 00:03:03,488 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:04,176 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:04,177 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 187.59 187.59
- sglang_output_tokens 66.80 66.80
- 2025-08-25 00:03:04,177 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:04,328 - sglang - INFO - [2025-08-25 00:03:04 TP0] Decode batch. #running-req: 1, #token: 3654, token usage: 0.10, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-08-25 00:03:04,329 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:05,168 - sglang - INFO - [2025-08-25 00:03:05 TP0] Decode batch. #running-req: 1, #token: 3694, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
- 2025-08-25 00:03:05,168 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:06,007 - sglang - INFO - [2025-08-25 00:03:06 TP0] Decode batch. #running-req: 1, #token: 3734, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
- 2025-08-25 00:03:06,007 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:06,845 - sglang - INFO - [2025-08-25 00:03:06 TP0] Decode batch. #running-req: 1, #token: 3774, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
- 2025-08-25 00:03:06,845 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:07,685 - sglang - INFO - [2025-08-25 00:03:07 TP0] Decode batch. #running-req: 1, #token: 3814, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:03:07,685 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:08,524 - sglang - INFO - [2025-08-25 00:03:08 TP0] Decode batch. #running-req: 1, #token: 3854, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
- 2025-08-25 00:03:08,524 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:09,363 - sglang - INFO - [2025-08-25 00:03:09 TP0] Decode batch. #running-req: 1, #token: 3894, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-08-25 00:03:09,364 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:10,204 - sglang - INFO - [2025-08-25 00:03:10 TP0] Decode batch. #running-req: 1, #token: 3934, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
- 2025-08-25 00:03:10,204 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:11,043 - sglang - INFO - [2025-08-25 00:03:11 TP0] Decode batch. #running-req: 1, #token: 3974, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
- 2025-08-25 00:03:11,043 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:11,884 - sglang - INFO - [2025-08-25 00:03:11 TP0] Decode batch. #running-req: 1, #token: 4014, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
- 2025-08-25 00:03:11,884 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:12,724 - sglang - INFO - [2025-08-25 00:03:12 TP0] Decode batch. #running-req: 1, #token: 4054, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-08-25 00:03:12,724 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:13,565 - sglang - INFO - [2025-08-25 00:03:13 TP0] Decode batch. #running-req: 1, #token: 4094, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-08-25 00:03:13,565 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:14,178 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:14,178 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 178.94 178.94
- sglang_output_tokens 63.72 63.72
- 2025-08-25 00:03:14,178 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:14,405 - sglang - INFO - [2025-08-25 00:03:14 TP0] Decode batch. #running-req: 1, #token: 4134, token usage: 0.11, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-08-25 00:03:14,406 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:15,246 - sglang - INFO - [2025-08-25 00:03:15 TP0] Decode batch. #running-req: 1, #token: 4174, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
- 2025-08-25 00:03:15,246 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:16,087 - sglang - INFO - [2025-08-25 00:03:16 TP0] Decode batch. #running-req: 1, #token: 4214, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
- 2025-08-25 00:03:16,087 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:16,927 - sglang - INFO - [2025-08-25 00:03:16 TP0] Decode batch. #running-req: 1, #token: 4254, token usage: 0.11, gen throughput (token/s): 47.60, #queue-req: 0
- 2025-08-25 00:03:16,928 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:17,768 - sglang - INFO - [2025-08-25 00:03:17 TP0] Decode batch. #running-req: 1, #token: 4294, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
- 2025-08-25 00:03:17,769 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:18,610 - sglang - INFO - [2025-08-25 00:03:18 TP0] Decode batch. #running-req: 1, #token: 4334, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-08-25 00:03:18,610 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:19,453 - sglang - INFO - [2025-08-25 00:03:19 TP0] Decode batch. #running-req: 1, #token: 4374, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
- 2025-08-25 00:03:19,453 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:20,294 - sglang - INFO - [2025-08-25 00:03:20 TP0] Decode batch. #running-req: 1, #token: 4414, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-08-25 00:03:20,295 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:21,136 - sglang - INFO - [2025-08-25 00:03:21 TP0] Decode batch. #running-req: 1, #token: 4454, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
- 2025-08-25 00:03:21,136 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:21,978 - sglang - INFO - [2025-08-25 00:03:21 TP0] Decode batch. #running-req: 1, #token: 4494, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
- 2025-08-25 00:03:21,978 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:22,821 - sglang - INFO - [2025-08-25 00:03:22 TP0] Decode batch. #running-req: 1, #token: 4534, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-08-25 00:03:22,821 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:23,664 - sglang - INFO - [2025-08-25 00:03:23 TP0] Decode batch. #running-req: 1, #token: 4574, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-08-25 00:03:23,664 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:24,179 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:24,180 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 171.06 171.06
- sglang_output_tokens 60.92 60.92
- 2025-08-25 00:03:24,180 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:24,507 - sglang - INFO - [2025-08-25 00:03:24 TP0] Decode batch. #running-req: 1, #token: 4614, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-08-25 00:03:24,508 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:25,350 - sglang - INFO - [2025-08-25 00:03:25 TP0] Decode batch. #running-req: 1, #token: 4654, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
- 2025-08-25 00:03:25,351 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:26,193 - sglang - INFO - [2025-08-25 00:03:26 TP0] Decode batch. #running-req: 1, #token: 4694, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
- 2025-08-25 00:03:26,194 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:27,037 - sglang - INFO - [2025-08-25 00:03:27 TP0] Decode batch. #running-req: 1, #token: 4734, token usage: 0.12, gen throughput (token/s): 47.40, #queue-req: 0
- 2025-08-25 00:03:27,037 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:27,881 - sglang - INFO - [2025-08-25 00:03:27 TP0] Decode batch. #running-req: 1, #token: 4774, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
- 2025-08-25 00:03:27,881 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:28,723 - sglang - INFO - [2025-08-25 00:03:28 TP0] Decode batch. #running-req: 1, #token: 4814, token usage: 0.13, gen throughput (token/s): 47.48, #queue-req: 0
- 2025-08-25 00:03:28,723 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:29,565 - sglang - INFO - [2025-08-25 00:03:29 TP0] Decode batch. #running-req: 1, #token: 4854, token usage: 0.13, gen throughput (token/s): 47.50, #queue-req: 0
- 2025-08-25 00:03:29,565 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:30,408 - sglang - INFO - [2025-08-25 00:03:30 TP0] Decode batch. #running-req: 1, #token: 4894, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
- 2025-08-25 00:03:30,408 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:31,254 - sglang - INFO - [2025-08-25 00:03:31 TP0] Decode batch. #running-req: 1, #token: 4934, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
- 2025-08-25 00:03:31,254 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:32,098 - sglang - INFO - [2025-08-25 00:03:32 TP0] Decode batch. #running-req: 1, #token: 4974, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-08-25 00:03:32,098 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:32,943 - sglang - INFO - [2025-08-25 00:03:32 TP0] Decode batch. #running-req: 1, #token: 5014, token usage: 0.13, gen throughput (token/s): 47.33, #queue-req: 0
- 2025-08-25 00:03:32,944 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:33,787 - sglang - INFO - [2025-08-25 00:03:33 TP0] Decode batch. #running-req: 1, #token: 5054, token usage: 0.13, gen throughput (token/s): 47.39, #queue-req: 0
- 2025-08-25 00:03:33,788 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:34,181 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:34,181 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 163.84 163.84
- sglang_output_tokens 58.35 58.35
- 2025-08-25 00:03:34,181 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:34,632 - sglang - INFO - [2025-08-25 00:03:34 TP0] Decode batch. #running-req: 1, #token: 5094, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-08-25 00:03:34,632 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:35,477 - sglang - INFO - [2025-08-25 00:03:35 TP0] Decode batch. #running-req: 1, #token: 5134, token usage: 0.14, gen throughput (token/s): 47.33, #queue-req: 0
- 2025-08-25 00:03:35,477 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:36,321 - sglang - INFO - [2025-08-25 00:03:36 TP0] Decode batch. #running-req: 1, #token: 5174, token usage: 0.14, gen throughput (token/s): 47.38, #queue-req: 0
- 2025-08-25 00:03:36,321 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:37,165 - sglang - INFO - [2025-08-25 00:03:37 TP0] Decode batch. #running-req: 1, #token: 5214, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
- 2025-08-25 00:03:37,165 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:38,009 - sglang - INFO - [2025-08-25 00:03:38 TP0] Decode batch. #running-req: 1, #token: 5254, token usage: 0.14, gen throughput (token/s): 47.35, #queue-req: 0
- 2025-08-25 00:03:38,010 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:38,856 - sglang - INFO - [2025-08-25 00:03:38 TP0] Decode batch. #running-req: 1, #token: 5294, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
- 2025-08-25 00:03:38,857 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:39,704 - sglang - INFO - [2025-08-25 00:03:39 TP0] Decode batch. #running-req: 1, #token: 5334, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-08-25 00:03:39,704 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:40,550 - sglang - INFO - [2025-08-25 00:03:40 TP0] Decode batch. #running-req: 1, #token: 5374, token usage: 0.14, gen throughput (token/s): 47.25, #queue-req: 0
- 2025-08-25 00:03:40,551 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:41,397 - sglang - INFO - [2025-08-25 00:03:41 TP0] Decode batch. #running-req: 1, #token: 5414, token usage: 0.14, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-08-25 00:03:41,397 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:42,243 - sglang - INFO - [2025-08-25 00:03:42 TP0] Decode batch. #running-req: 1, #token: 5454, token usage: 0.14, gen throughput (token/s): 47.27, #queue-req: 0
- 2025-08-25 00:03:42,243 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:43,091 - sglang - INFO - [2025-08-25 00:03:43 TP0] Decode batch. #running-req: 1, #token: 5494, token usage: 0.14, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-08-25 00:03:43,091 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:43,939 - sglang - INFO - [2025-08-25 00:03:43 TP0] Decode batch. #running-req: 1, #token: 5534, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:03:43,939 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:44,182 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:44,183 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 157.20 157.20
- sglang_output_tokens 55.98 55.98
- 2025-08-25 00:03:44,183 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:44,786 - sglang - INFO - [2025-08-25 00:03:44 TP0] Decode batch. #running-req: 1, #token: 5574, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
- 2025-08-25 00:03:44,786 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:45,632 - sglang - INFO - [2025-08-25 00:03:45 TP0] Decode batch. #running-req: 1, #token: 5614, token usage: 0.15, gen throughput (token/s): 47.29, #queue-req: 0
- 2025-08-25 00:03:45,632 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:46,478 - sglang - INFO - [2025-08-25 00:03:46 TP0] Decode batch. #running-req: 1, #token: 5654, token usage: 0.15, gen throughput (token/s): 47.26, #queue-req: 0
- 2025-08-25 00:03:46,478 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:47,326 - sglang - INFO - [2025-08-25 00:03:47 TP0] Decode batch. #running-req: 1, #token: 5694, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:03:47,326 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:48,173 - sglang - INFO - [2025-08-25 00:03:48 TP0] Decode batch. #running-req: 1, #token: 5734, token usage: 0.15, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-08-25 00:03:48,174 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:49,021 - sglang - INFO - [2025-08-25 00:03:49 TP0] Decode batch. #running-req: 1, #token: 5774, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
- 2025-08-25 00:03:49,022 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:49,867 - sglang - INFO - [2025-08-25 00:03:49 TP0] Decode batch. #running-req: 1, #token: 5814, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
- 2025-08-25 00:03:49,868 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:50,714 - sglang - INFO - [2025-08-25 00:03:50 TP0] Decode batch. #running-req: 1, #token: 5854, token usage: 0.15, gen throughput (token/s): 47.25, #queue-req: 0
- 2025-08-25 00:03:50,714 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:51,562 - sglang - INFO - [2025-08-25 00:03:51 TP0] Decode batch. #running-req: 1, #token: 5894, token usage: 0.16, gen throughput (token/s): 47.18, #queue-req: 0
- 2025-08-25 00:03:51,562 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:52,410 - sglang - INFO - [2025-08-25 00:03:52 TP0] Decode batch. #running-req: 1, #token: 5934, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
- 2025-08-25 00:03:52,410 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:53,257 - sglang - INFO - [2025-08-25 00:03:53 TP0] Decode batch. #running-req: 1, #token: 5974, token usage: 0.16, gen throughput (token/s): 47.20, #queue-req: 0
- 2025-08-25 00:03:53,258 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:54,104 - sglang - INFO - [2025-08-25 00:03:54 TP0] Decode batch. #running-req: 1, #token: 6014, token usage: 0.16, gen throughput (token/s): 47.24, #queue-req: 0
- 2025-08-25 00:03:54,104 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:54,184 - __main__ - INFO - Queue remaining: 0
- 2025-08-25 00:03:54,184 - __main__ - INFO -
- Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
- ----------------------------------------------------------------------------------
- sglang_input_tokens 151.08 151.08
- sglang_output_tokens 53.80 53.80
- 2025-08-25 00:03:54,184 - __main__ - INFO -
- Worker ID | finished | started
- ----------+----------+--------
- 0 | 10 | 11
- 2025-08-25 00:03:54,952 - sglang - INFO - [2025-08-25 00:03:54 TP0] Decode batch. #running-req: 1, #token: 6054, token usage: 0.16, gen throughput (token/s): 47.19, #queue-req: 0
- 2025-08-25 00:03:54,952 - __main__ - INFO - sglang running req: 1 queue req: 0
- 2025-08-25 00:03:55,277 - __main__ - INFO - Finished TaskGroup for worker on 73c9399482ed5cf37e1888c000e49ef82a30c10d
- 2025-08-25 00:03:55,278 - __main__ - INFO - Got 1 docs for 73c9399482ed5cf37e1888c000e49ef82a30c10d
- 2025-08-25 00:03:55,280 - __main__ - INFO - Worker 0 exiting due to empty queue
- 2025-08-25 00:03:55,280 - __main__ - INFO - Work done
- 2025-08-25 00:03:55,281 - __main__ - INFO - Got cancellation request for SGLang server
|