olmocr-pipeline-debug.log 3.3 MB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576235772357823579235802358123582235832358423585235862358723588235892359023591235922359323594235952359623597235982359923600236012360223603236042360523606236072360823609236102361123612236132361423615236162361723618236192362023621236222362323624236252362623627236282362923630236312363223633236342363523636236372363823639236402364123642236432364423645236462364723648236492365023651236522365323654236552365623657236582365923660236612366223663236642366523666236672366823669236702367123672236732367423675236762367723678236792368023681236822368323684236852368623687236882368923690236912369223693236942369523696236972369823699237002370123702237032370423705237062370723708237092371023711237122371323714237152371623717237182371923720237212372223723237242372523726237272372823729237302373123732237332373423735237362373723738237392374023741237422374323744237452374623747237482374923750237512375223753237542375523756237572375823759237602376123762237632376423765237662376723768237692377023771237722377323774237752377623777237782377923780237812378223783237842378523786237872378823789237902379123792237932379423795237962379723798237992380023801238022380323804238052380623807238082380923810238112381223813238142381523816238172381823819238202382123822238232382423825238262382723828238292383023831238322383323834238352383623837238382383923840238412384223843238442384523846238472384823849238502385123852238532385423855238562385723858238592386023861238622386323864238652386623867238682386923870238712387223873238742387523876238772387823879238802388123882238832388423885238862388723888238892389023891238922389323894238952389623897238982389923900239012390223903239042390523906239072390823909239102391123912239132391423915239162391723918239192392023921239222392323924239252392623927239282392923930239312393223933239342393523936239372393823939239402394123942239432394423945239462394723948239492395023951239522395323954239552395623957239582395923960239612396223963239642396523966239672396823969239702397123972239732397423975239762397723978239792398023981239822398323984239852398623987239882398923990239912399223993239942399523996239972399823999240002400124002240032400424005240062400724008240092401024011240122401324014240152401624017240182401924020240212402224023240242402524026240272402824029240302403124032240332403424035240362403724038240392404024041240422404324044240452404624047240482404924050240512405224053240542405524056240572405824059240602406124062240632406424065240662406724068240692407024071240722407324074240752407624077240782407924080240812408224083240842408524086240872408824089240902409124092240932409424095240962409724098240992410024101241022410324104241052410624107241082410924110241112411224113241142411524116241172411824119241202412124122241232412424125241262412724128241292413024131241322413324134241352413624137241382413924140241412414224143241442414524146241472414824149241502415124152241532415424155241562415724158241592416024161241622416324164241652416624167241682416924170241712417224173241742417524176241772417824179241802418124182241832418424185241862418724188241892419024191241922419324194241952419624197241982419924200242012420224203242042420524206242072420824209242102421124212242132421424215242162421724218242192422024221242222422324224242252422624227242282422924230242312423224233242342423524236242372423824239242402424124242242432424424245242462424724248242492425024251242522425324254242552425624257242582425924260242612426224263242642426524266242672426824269242702427124272242732427424275242762427724278242792428024281242822428324284242852428624287242882428924290242912429224293242942429524296242972429824299243002430124302243032430424305243062430724308243092431024311243122431324314243152431624317243182431924320243212432224323243242432524326243272432824329243302433124332243332433424335243362433724338243392434024341243422434324344243452434624347243482434924350243512435224353243542435524356243572435824359243602436124362243632436424365243662436724368243692437024371243722437324374243752437624377243782437924380243812438224383243842438524386243872438824389243902439124392243932439424395243962439724398243992440024401244022440324404244052440624407244082440924410244112441224413244142441524416244172441824419244202442124422244232442424425244262442724428244292443024431244322443324434244352443624437244382443924440244412444224443244442444524446244472444824449244502445124452244532445424455244562445724458244592446024461244622446324464244652446624467244682446924470244712447224473244742447524476244772447824479244802448124482244832448424485244862448724488244892449024491244922449324494244952449624497244982449924500245012450224503245042450524506245072450824509245102451124512245132451424515245162451724518245192452024521245222452324524245252452624527245282452924530245312453224533245342453524536245372453824539245402454124542245432454424545245462454724548245492455024551245522455324554245552455624557245582455924560245612456224563245642456524566245672456824569245702457124572245732457424575245762457724578245792458024581245822458324584245852458624587245882458924590245912459224593245942459524596245972459824599246002460124602246032460424605246062460724608246092461024611246122461324614246152461624617246182461924620246212462224623246242462524626246272462824629246302463124632246332463424635246362463724638246392464024641246422464324644246452464624647246482464924650246512465224653246542465524656246572465824659246602466124662246632466424665246662466724668246692467024671246722467324674246752467624677246782467924680246812468224683246842468524686246872468824689246902469124692246932469424695246962469724698246992470024701247022470324704247052470624707247082470924710247112471224713247142471524716247172471824719247202472124722247232472424725247262472724728247292473024731247322473324734247352473624737247382473924740247412474224743247442474524746247472474824749247502475124752247532475424755247562475724758247592476024761247622476324764247652476624767247682476924770247712477224773247742477524776247772477824779247802478124782247832478424785247862478724788247892479024791247922479324794247952479624797247982479924800248012480224803248042480524806248072480824809248102481124812248132481424815248162481724818248192482024821248222482324824248252482624827248282482924830248312483224833248342483524836248372483824839248402484124842248432484424845248462484724848248492485024851248522485324854248552485624857248582485924860248612486224863248642486524866248672486824869248702487124872248732487424875248762487724878248792488024881248822488324884248852488624887248882488924890248912489224893248942489524896248972489824899249002490124902249032490424905249062490724908249092491024911249122491324914249152491624917249182491924920249212492224923249242492524926249272492824929249302493124932249332493424935249362493724938249392494024941249422494324944249452494624947249482494924950249512495224953249542495524956249572495824959249602496124962249632496424965249662496724968249692497024971249722497324974249752497624977249782497924980249812498224983249842498524986249872498824989249902499124992249932499424995249962499724998249992500025001250022500325004250052500625007250082500925010250112501225013250142501525016250172501825019250202502125022250232502425025250262502725028250292503025031250322503325034250352503625037250382503925040250412504225043250442504525046250472504825049250502505125052250532505425055250562505725058250592506025061250622506325064250652506625067250682506925070250712507225073250742507525076250772507825079250802508125082250832508425085250862508725088250892509025091250922509325094250952509625097250982509925100251012510225103251042510525106251072510825109251102511125112251132511425115251162511725118251192512025121251222512325124251252512625127251282512925130251312513225133251342513525136251372513825139251402514125142251432514425145251462514725148251492515025151251522515325154251552515625157251582515925160251612516225163251642516525166251672516825169251702517125172251732517425175251762517725178251792518025181251822518325184251852518625187251882518925190251912519225193251942519525196251972519825199252002520125202252032520425205252062520725208252092521025211252122521325214252152521625217252182521925220252212522225223252242522525226252272522825229252302523125232252332523425235252362523725238252392524025241252422524325244252452524625247252482524925250252512525225253252542525525256252572525825259252602526125262252632526425265252662526725268252692527025271252722527325274252752527625277252782527925280252812528225283252842528525286252872528825289252902529125292252932529425295252962529725298252992530025301253022530325304253052530625307253082530925310253112531225313253142531525316253172531825319253202532125322253232532425325253262532725328253292533025331253322533325334253352533625337253382533925340253412534225343253442534525346253472534825349253502535125352253532535425355253562535725358253592536025361253622536325364253652536625367253682536925370253712537225373253742537525376253772537825379253802538125382253832538425385253862538725388253892539025391253922539325394253952539625397253982539925400254012540225403254042540525406254072540825409254102541125412254132541425415254162541725418254192542025421254222542325424254252542625427254282542925430254312543225433254342543525436254372543825439254402544125442254432544425445254462544725448254492545025451254522545325454254552545625457254582545925460254612546225463254642546525466254672546825469254702547125472254732547425475254762547725478254792548025481254822548325484254852548625487254882548925490254912549225493254942549525496254972549825499255002550125502255032550425505255062550725508255092551025511255122551325514255152551625517255182551925520255212552225523255242552525526255272552825529255302553125532255332553425535255362553725538255392554025541255422554325544255452554625547255482554925550255512555225553255542555525556255572555825559255602556125562255632556425565255662556725568255692557025571255722557325574255752557625577255782557925580255812558225583255842558525586255872558825589255902559125592255932559425595255962559725598255992560025601256022560325604256052560625607256082560925610256112561225613256142561525616256172561825619256202562125622256232562425625256262562725628256292563025631256322563325634256352563625637256382563925640256412564225643256442564525646256472564825649256502565125652256532565425655256562565725658256592566025661256622566325664256652566625667256682566925670256712567225673256742567525676256772567825679256802568125682256832568425685256862568725688256892569025691256922569325694256952569625697256982569925700257012570225703257042570525706257072570825709257102571125712257132571425715257162571725718257192572025721257222572325724257252572625727257282572925730257312573225733257342573525736257372573825739257402574125742257432574425745257462574725748257492575025751257522575325754257552575625757257582575925760257612576225763257642576525766257672576825769257702577125772257732577425775257762577725778257792578025781257822578325784257852578625787257882578925790257912579225793257942579525796257972579825799258002580125802258032580425805258062580725808258092581025811258122581325814258152581625817258182581925820258212582225823258242582525826258272582825829258302583125832258332583425835258362583725838258392584025841258422584325844258452584625847258482584925850258512585225853258542585525856258572585825859258602586125862258632586425865258662586725868258692587025871258722587325874258752587625877258782587925880258812588225883258842588525886258872588825889258902589125892258932589425895258962589725898258992590025901259022590325904259052590625907259082590925910259112591225913259142591525916259172591825919259202592125922259232592425925259262592725928259292593025931259322593325934259352593625937259382593925940259412594225943259442594525946259472594825949259502595125952259532595425955259562595725958259592596025961259622596325964259652596625967259682596925970259712597225973259742597525976259772597825979259802598125982259832598425985259862598725988259892599025991259922599325994259952599625997259982599926000260012600226003260042600526006260072600826009260102601126012260132601426015260162601726018260192602026021260222602326024260252602626027260282602926030260312603226033260342603526036260372603826039260402604126042260432604426045260462604726048260492605026051260522605326054260552605626057260582605926060260612606226063260642606526066260672606826069260702607126072260732607426075260762607726078260792608026081260822608326084260852608626087260882608926090260912609226093260942609526096260972609826099261002610126102261032610426105261062610726108261092611026111261122611326114261152611626117261182611926120261212612226123261242612526126261272612826129261302613126132261332613426135261362613726138261392614026141261422614326144261452614626147261482614926150261512615226153261542615526156261572615826159261602616126162261632616426165261662616726168261692617026171261722617326174261752617626177261782617926180261812618226183261842618526186261872618826189261902619126192261932619426195261962619726198261992620026201262022620326204262052620626207262082620926210262112621226213262142621526216262172621826219262202622126222262232622426225262262622726228262292623026231262322623326234262352623626237262382623926240262412624226243262442624526246262472624826249262502625126252262532625426255262562625726258262592626026261262622626326264262652626626267262682626926270262712627226273262742627526276262772627826279262802628126282262832628426285262862628726288262892629026291262922629326294262952629626297262982629926300263012630226303263042630526306263072630826309263102631126312263132631426315263162631726318263192632026321263222632326324263252632626327263282632926330263312633226333263342633526336263372633826339263402634126342263432634426345263462634726348263492635026351263522635326354263552635626357263582635926360263612636226363263642636526366263672636826369263702637126372263732637426375263762637726378263792638026381263822638326384263852638626387263882638926390263912639226393263942639526396263972639826399264002640126402264032640426405264062640726408264092641026411264122641326414264152641626417264182641926420264212642226423264242642526426264272642826429264302643126432264332643426435264362643726438264392644026441264422644326444264452644626447264482644926450264512645226453264542645526456264572645826459264602646126462264632646426465264662646726468264692647026471264722647326474264752647626477264782647926480264812648226483264842648526486264872648826489264902649126492264932649426495264962649726498264992650026501265022650326504265052650626507265082650926510265112651226513265142651526516265172651826519265202652126522265232652426525265262652726528265292653026531265322653326534265352653626537265382653926540265412654226543265442654526546265472654826549265502655126552265532655426555265562655726558265592656026561265622656326564265652656626567265682656926570265712657226573265742657526576265772657826579265802658126582265832658426585265862658726588265892659026591265922659326594265952659626597265982659926600266012660226603266042660526606266072660826609266102661126612266132661426615266162661726618266192662026621266222662326624266252662626627266282662926630266312663226633266342663526636266372663826639266402664126642266432664426645266462664726648266492665026651266522665326654266552665626657266582665926660266612666226663266642666526666266672666826669266702667126672266732667426675266762667726678266792668026681266822668326684266852668626687266882668926690266912669226693266942669526696266972669826699267002670126702267032670426705267062670726708267092671026711267122671326714267152671626717267182671926720267212672226723267242672526726267272672826729267302673126732267332673426735267362673726738267392674026741267422674326744267452674626747267482674926750267512675226753267542675526756267572675826759267602676126762267632676426765267662676726768267692677026771267722677326774267752677626777267782677926780267812678226783267842678526786267872678826789267902679126792267932679426795267962679726798267992680026801268022680326804268052680626807268082680926810268112681226813268142681526816268172681826819268202682126822268232682426825268262682726828268292683026831268322683326834268352683626837268382683926840268412684226843268442684526846268472684826849268502685126852268532685426855268562685726858268592686026861268622686326864268652686626867268682686926870268712687226873268742687526876268772687826879268802688126882268832688426885268862688726888268892689026891268922689326894268952689626897268982689926900269012690226903269042690526906269072690826909269102691126912269132691426915269162691726918269192692026921269222692326924269252692626927269282692926930269312693226933269342693526936269372693826939269402694126942269432694426945269462694726948269492695026951269522695326954269552695626957269582695926960269612696226963269642696526966269672696826969269702697126972269732697426975269762697726978269792698026981269822698326984269852698626987269882698926990269912699226993269942699526996269972699826999270002700127002270032700427005270062700727008270092701027011270122701327014270152701627017270182701927020270212702227023270242702527026270272702827029270302703127032270332703427035270362703727038270392704027041270422704327044270452704627047270482704927050270512705227053270542705527056270572705827059270602706127062270632706427065270662706727068270692707027071270722707327074270752707627077270782707927080270812708227083270842708527086270872708827089270902709127092270932709427095270962709727098270992710027101271022710327104271052710627107271082710927110271112711227113271142711527116271172711827119271202712127122271232712427125271262712727128271292713027131271322713327134271352713627137271382713927140271412714227143271442714527146271472714827149271502715127152271532715427155271562715727158271592716027161271622716327164271652716627167271682716927170271712717227173271742717527176271772717827179271802718127182271832718427185271862718727188271892719027191271922719327194271952719627197271982719927200272012720227203272042720527206272072720827209272102721127212272132721427215272162721727218272192722027221272222722327224272252722627227272282722927230272312723227233272342723527236272372723827239272402724127242272432724427245272462724727248272492725027251272522725327254272552725627257272582725927260272612726227263272642726527266272672726827269272702727127272272732727427275272762727727278272792728027281272822728327284272852728627287272882728927290272912729227293272942729527296272972729827299273002730127302273032730427305273062730727308273092731027311273122731327314273152731627317273182731927320273212732227323273242732527326273272732827329273302733127332273332733427335273362733727338273392734027341273422734327344273452734627347273482734927350273512735227353273542735527356273572735827359273602736127362273632736427365273662736727368273692737027371273722737327374273752737627377273782737927380273812738227383273842738527386273872738827389273902739127392273932739427395273962739727398273992740027401274022740327404274052740627407274082740927410274112741227413274142741527416274172741827419274202742127422274232742427425274262742727428274292743027431274322743327434274352743627437274382743927440274412744227443274442744527446274472744827449274502745127452274532745427455274562745727458274592746027461274622746327464274652746627467274682746927470274712747227473274742747527476274772747827479274802748127482274832748427485274862748727488274892749027491274922749327494274952749627497274982749927500275012750227503275042750527506275072750827509275102751127512275132751427515275162751727518275192752027521275222752327524275252752627527275282752927530275312753227533275342753527536275372753827539275402754127542275432754427545275462754727548275492755027551275522755327554275552755627557275582755927560275612756227563275642756527566275672756827569275702757127572275732757427575275762757727578275792758027581275822758327584275852758627587275882758927590275912759227593275942759527596275972759827599276002760127602276032760427605276062760727608276092761027611276122761327614276152761627617276182761927620276212762227623276242762527626276272762827629276302763127632276332763427635276362763727638276392764027641276422764327644276452764627647276482764927650276512765227653276542765527656276572765827659276602766127662276632766427665276662766727668276692767027671276722767327674276752767627677276782767927680276812768227683276842768527686276872768827689276902769127692276932769427695276962769727698276992770027701277022770327704277052770627707277082770927710277112771227713277142771527716277172771827719277202772127722277232772427725277262772727728277292773027731277322773327734277352773627737277382773927740277412774227743277442774527746277472774827749277502775127752277532775427755277562775727758277592776027761277622776327764277652776627767277682776927770277712777227773277742777527776277772777827779277802778127782277832778427785277862778727788277892779027791277922779327794277952779627797277982779927800278012780227803278042780527806278072780827809278102781127812278132781427815278162781727818278192782027821278222782327824278252782627827278282782927830278312783227833278342783527836278372783827839278402784127842278432784427845278462784727848278492785027851278522785327854278552785627857278582785927860278612786227863278642786527866278672786827869278702787127872278732787427875278762787727878278792788027881278822788327884278852788627887278882788927890278912789227893278942789527896278972789827899279002790127902279032790427905279062790727908279092791027911279122791327914279152791627917279182791927920279212792227923279242792527926279272792827929279302793127932279332793427935279362793727938279392794027941279422794327944279452794627947279482794927950279512795227953279542795527956279572795827959279602796127962279632796427965279662796727968279692797027971279722797327974279752797627977279782797927980279812798227983279842798527986279872798827989279902799127992279932799427995279962799727998279992800028001280022800328004280052800628007280082800928010280112801228013280142801528016280172801828019280202802128022280232802428025280262802728028280292803028031280322803328034280352803628037280382803928040280412804228043280442804528046280472804828049280502805128052280532805428055280562805728058280592806028061280622806328064280652806628067280682806928070280712807228073280742807528076280772807828079280802808128082280832808428085280862808728088280892809028091280922809328094280952809628097280982809928100281012810228103281042810528106281072810828109281102811128112281132811428115281162811728118281192812028121281222812328124281252812628127281282812928130281312813228133
  1. 2025-05-16 01:40:55,678 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2. 2025-05-16 01:40:55,679 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  3. 2025-05-16 01:40:55,679 - __main__ - INFO - Found 1 total pdf paths to add
  4. 2025-05-16 01:40:55,683 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  5. 2025-05-16 02:00:47,526 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6. 2025-05-16 02:00:47,526 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  7. 2025-05-16 02:00:47,526 - __main__ - INFO - Found 1 total pdf paths to add
  8. 2025-05-16 02:00:47,529 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  9. 2025-05-16 02:00:47,720 - __main__ - INFO - Starting pipeline with PID 347737
  10. 2025-05-16 02:00:47,720 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  11. 2025-05-16 02:03:44,171 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  12. 2025-05-16 02:03:44,171 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  13. 2025-05-16 02:03:44,171 - __main__ - INFO - Found 1 total pdf paths to add
  14. 2025-05-16 02:03:44,175 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  15. 2025-05-16 02:03:44,416 - __main__ - INFO - Starting pipeline with PID 347855
  16. 2025-05-16 02:03:44,416 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  17. 2025-05-16 02:06:11,039 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  18. 2025-05-16 02:06:11,039 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  19. 2025-05-16 02:06:11,039 - __main__ - INFO - Found 1 total pdf paths to add
  20. 2025-05-16 02:06:11,043 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  21. 2025-05-16 02:06:11,311 - __main__ - INFO - Starting pipeline with PID 347960
  22. 2025-05-16 02:06:11,311 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  23. 2025-05-17 01:34:19,419 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  24. 2025-05-17 01:34:19,419 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  25. 2025-05-17 01:34:19,420 - __main__ - INFO - Found 1 total pdf paths to add
  26. 2025-05-17 01:34:19,424 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  27. 2025-05-17 01:34:19,659 - __main__ - INFO - Starting pipeline with PID 370510
  28. 2025-05-17 01:34:19,659 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  29. 2025-05-17 01:42:18,000 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  30. 2025-05-17 01:42:18,000 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  31. 2025-05-17 01:42:18,000 - __main__ - INFO - Found 1 total pdf paths to add
  32. 2025-05-17 01:42:18,004 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  33. 2025-05-17 01:42:18,204 - __main__ - INFO - Starting pipeline with PID 370697
  34. 2025-05-17 01:42:18,204 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  35. 2025-05-17 01:46:11,794 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  36. 2025-05-17 01:46:12,829 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  37. 2025-05-17 01:46:13,879 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  38. 2025-05-17 01:46:14,944 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  39. 2025-05-17 01:46:16,011 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  40. 2025-05-17 01:46:17,040 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  41. 2025-05-17 01:46:17,815 - sglang - INFO - [2025-05-17 01:46:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=47741023, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  42. 2025-05-17 01:46:17,815 - __main__ - INFO - [2025-05-17 01:46:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=47741023, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  43. 2025-05-17 01:46:18,110 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  44. 2025-05-17 01:46:19,171 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  45. 2025-05-17 01:46:20,235 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  46. 2025-05-17 01:46:21,302 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  47. 2025-05-17 01:46:22,439 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  48. 2025-05-17 01:46:23,255 - sglang - INFO - [2025-05-17 01:46:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
  49. 2025-05-17 01:46:23,255 - __main__ - INFO - [2025-05-17 01:46:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
  50. 2025-05-17 01:46:23,515 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  51. 2025-05-17 01:46:24,582 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  52. 2025-05-17 01:46:25,649 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  53. 2025-05-17 01:46:26,716 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  54. 2025-05-17 01:46:27,783 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  55. 2025-05-17 01:46:28,847 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  56. 2025-05-17 01:46:29,906 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  57. 2025-05-17 01:46:30,974 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  58. 2025-05-17 01:46:32,048 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  59. 2025-05-17 01:46:32,899 - sglang - INFO - [2025-05-17 01:46:32 TP0] Overlap scheduler is disabled for multimodal models.
  60. 2025-05-17 01:46:32,900 - __main__ - INFO - [2025-05-17 01:46:32 TP0] Overlap scheduler is disabled for multimodal models.
  61. 2025-05-17 01:46:33,125 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  62. 2025-05-17 01:46:33,730 - sglang - INFO - [2025-05-17 01:46:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  63. 2025-05-17 01:46:33,730 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  64. 2025-05-17 01:46:33,730 - sglang - INFO - [2025-05-17 01:46:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  65. 2025-05-17 01:46:33,730 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  66. 2025-05-17 01:46:33,731 - sglang - INFO - [2025-05-17 01:46:33 TP0] Init torch distributed begin.
  67. 2025-05-17 01:46:33,731 - __main__ - INFO - [2025-05-17 01:46:33 TP0] Init torch distributed begin.
  68. 2025-05-17 01:46:34,202 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  69. 2025-05-17 01:46:35,274 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  70. 2025-05-17 01:46:36,341 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  71. 2025-05-17 01:46:37,409 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  72. 2025-05-17 01:46:38,477 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  73. 2025-05-17 01:46:39,122 - sglang - INFO - [2025-05-17 01:46:39 TP0] Load weight begin. avail mem=23.33 GB
  74. 2025-05-17 01:46:39,123 - __main__ - INFO - [2025-05-17 01:46:39 TP0] Load weight begin. avail mem=23.33 GB
  75. 2025-05-17 01:46:39,555 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  76. 2025-05-17 01:46:40,623 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  77. 2025-05-17 01:46:40,752 - sglang - INFO - [2025-05-17 01:46:40 TP0] Using model weights format ['*.safetensors']
  78. 2025-05-17 01:46:40,752 - __main__ - INFO - [2025-05-17 01:46:40 TP0] Using model weights format ['*.safetensors']
  79. 2025-05-17 01:46:41,534 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  80. 2025-05-17 01:46:41,534 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  81. 2025-05-17 01:46:41,700 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  82. 2025-05-17 01:46:41,980 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.25it/s]
  83. 2025-05-17 01:46:41,980 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.25it/s]
  84. 2025-05-17 01:46:42,778 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  85. 2025-05-17 01:46:43,261 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.07it/s]
  86. 2025-05-17 01:46:43,261 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.07it/s]
  87. 2025-05-17 01:46:43,855 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  88. 2025-05-17 01:46:44,579 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
  89. 2025-05-17 01:46:44,579 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
  90. 2025-05-17 01:46:44,932 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  91. 2025-05-17 01:46:45,749 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.13s/it]
  92. 2025-05-17 01:46:45,749 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.13s/it]
  93. 2025-05-17 01:46:45,750 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.05s/it]
  94. 2025-05-17 01:46:45,750 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.05s/it]
  95. 2025-05-17 01:46:45,750 - sglang - INFO -
  96. 2025-05-17 01:46:45,750 - __main__ - INFO -
  97. 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  98. 2025-05-17 01:46:45,997 - __main__ - INFO - [2025-05-17 01:46:45 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  99. 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  100. 2025-05-17 01:46:45,997 - __main__ - INFO - [2025-05-17 01:46:45 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  101. 2025-05-17 01:46:45,997 - sglang - INFO - [2025-05-17 01:46:45 TP0] Memory pool end. avail mem=5.30 GB
  102. 2025-05-17 01:46:45,998 - __main__ - INFO - [2025-05-17 01:46:45 TP0] Memory pool end. avail mem=5.30 GB
  103. 2025-05-17 01:46:45,999 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  104. 2025-05-17 01:46:46,102 - sglang - INFO - [2025-05-17 01:46:46 TP0] Capture cuda graph begin. This can take up to several minutes.
  105. 2025-05-17 01:46:46,103 - __main__ - INFO - [2025-05-17 01:46:46 TP0] Capture cuda graph begin. This can take up to several minutes.
  106. 2025-05-17 01:46:47,075 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  107. 2025-05-17 01:46:48,152 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  108. 2025-05-17 01:46:48,276 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.45s/it] 50%|█████ | 2/4 [00:01<00:01, 1.35it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.96it/s] 100%|██████████| 4/4 [00:02<00:00, 2.48it/s] 100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
  109. 2025-05-17 01:46:48,276 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.45s/it] 50%|█████ | 2/4 [00:01<00:01, 1.35it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.96it/s] 100%|██████████| 4/4 [00:02<00:00, 2.48it/s] 100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
  110. 2025-05-17 01:46:48,276 - sglang - INFO - [2025-05-17 01:46:48 TP0] Capture cuda graph end. Time elapsed: 2.17 s
  111. 2025-05-17 01:46:48,277 - __main__ - INFO - [2025-05-17 01:46:48 TP0] Capture cuda graph end. Time elapsed: 2.17 s
  112. 2025-05-17 01:46:49,229 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  113. 2025-05-17 01:46:50,297 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  114. 2025-05-17 01:46:51,365 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  115. 2025-05-17 01:46:51,507 - sglang - INFO - [2025-05-17 01:46:51 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  116. 2025-05-17 01:46:51,507 - __main__ - INFO - [2025-05-17 01:46:51 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  117. 2025-05-17 01:46:52,460 - __main__ - INFO - sglang server is ready.
  118. 2025-05-17 01:46:52,461 - __main__ - INFO - Queue remaining: 1
  119. 2025-05-17 01:46:52,461 - __main__ - INFO -
  120. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  121. ----------------------------------------------------------------------------------
  122. 2025-05-17 01:46:52,461 - __main__ - INFO -
  123. Worker ID
  124. ---------
  125. 2025-05-17 01:46:52,462 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
  126. 2025-05-17 01:46:52,462 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
  127. 2025-05-17 01:46:52,467 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
  128. 2025-05-17 01:46:52,581 - sglang - INFO - [2025-05-17 01:46:52 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  129. 2025-05-17 01:46:52,581 - __main__ - INFO - [2025-05-17 01:46:52 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  130. 2025-05-17 01:46:52,581 - __main__ - INFO - sglang running req: 0 queue req: 0
  131. 2025-05-17 01:46:54,142 - sglang - INFO - [2025-05-17 01:46:54] The server is fired up and ready to roll!
  132. 2025-05-17 01:46:54,142 - __main__ - INFO - [2025-05-17 01:46:54] The server is fired up and ready to roll!
  133. 2025-05-17 01:46:58,483 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
  134. 2025-05-17 01:47:02,463 - __main__ - INFO - Queue remaining: 0
  135. 2025-05-17 01:47:02,463 - __main__ - INFO -
  136. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  137. ----------------------------------------------------------------------------------
  138. 2025-05-17 01:47:02,464 - __main__ - INFO -
  139. Worker ID | started
  140. ----------+--------
  141. 0 | 1
  142. 2025-05-17 01:47:04,736 - sglang - INFO - [2025-05-17 01:47:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  143. 2025-05-17 01:47:04,736 - __main__ - INFO - sglang running req: 0 queue req: 0
  144. 2025-05-17 01:47:09,780 - sglang - INFO - [2025-05-17 01:47:09 TP0] Decode batch. #running-req: 1, #token: 1842, token usage: 0.05, gen throughput (token/s): 2.19, #queue-req: 0
  145. 2025-05-17 01:47:09,780 - __main__ - INFO - sglang running req: 1 queue req: 0
  146. 2025-05-17 01:47:12,465 - __main__ - INFO - Queue remaining: 0
  147. 2025-05-17 01:47:12,465 - __main__ - INFO -
  148. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  149. ----------------------------------------------------------------------------------
  150. 2025-05-17 01:47:12,465 - __main__ - INFO -
  151. Worker ID | started
  152. ----------+--------
  153. 0 | 1
  154. 2025-05-17 01:47:13,779 - sglang - INFO - [2025-05-17 01:47:13 TP0] Decode batch. #running-req: 1, #token: 1882, token usage: 0.05, gen throughput (token/s): 10.00, #queue-req: 0
  155. 2025-05-17 01:47:13,779 - __main__ - INFO - sglang running req: 1 queue req: 0
  156. 2025-05-17 01:47:17,579 - sglang - INFO - [2025-05-17 01:47:17 TP0] Decode batch. #running-req: 1, #token: 1922, token usage: 0.05, gen throughput (token/s): 10.53, #queue-req: 0
  157. 2025-05-17 01:47:17,580 - __main__ - INFO - sglang running req: 1 queue req: 0
  158. 2025-05-17 01:47:19,600 - sglang - INFO - [2025-05-17 01:47:19 TP0] Decode batch. #running-req: 1, #token: 1962, token usage: 0.05, gen throughput (token/s): 19.79, #queue-req: 0
  159. 2025-05-17 01:47:19,600 - __main__ - INFO - sglang running req: 1 queue req: 0
  160. 2025-05-17 01:47:20,631 - sglang - INFO - [2025-05-17 01:47:20 TP0] Decode batch. #running-req: 1, #token: 2002, token usage: 0.05, gen throughput (token/s): 38.78, #queue-req: 0
  161. 2025-05-17 01:47:20,632 - __main__ - INFO - sglang running req: 1 queue req: 0
  162. 2025-05-17 01:47:21,454 - sglang - INFO - [2025-05-17 01:47:21 TP0] Decode batch. #running-req: 1, #token: 2042, token usage: 0.05, gen throughput (token/s): 48.64, #queue-req: 0
  163. 2025-05-17 01:47:21,454 - __main__ - INFO - sglang running req: 1 queue req: 0
  164. 2025-05-17 01:47:22,274 - sglang - INFO - [2025-05-17 01:47:22 TP0] Decode batch. #running-req: 1, #token: 2082, token usage: 0.05, gen throughput (token/s): 48.75, #queue-req: 0
  165. 2025-05-17 01:47:22,275 - __main__ - INFO - sglang running req: 1 queue req: 0
  166. 2025-05-17 01:47:22,466 - __main__ - INFO - Queue remaining: 0
  167. 2025-05-17 01:47:22,466 - __main__ - INFO -
  168. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  169. ----------------------------------------------------------------------------------
  170. 2025-05-17 01:47:22,467 - __main__ - INFO -
  171. Worker ID | started
  172. ----------+--------
  173. 0 | 1
  174. 2025-05-17 01:47:23,095 - sglang - INFO - [2025-05-17 01:47:23 TP0] Decode batch. #running-req: 1, #token: 2122, token usage: 0.06, gen throughput (token/s): 48.75, #queue-req: 0
  175. 2025-05-17 01:47:23,095 - __main__ - INFO - sglang running req: 1 queue req: 0
  176. 2025-05-17 01:47:23,917 - sglang - INFO - [2025-05-17 01:47:23 TP0] Decode batch. #running-req: 1, #token: 2162, token usage: 0.06, gen throughput (token/s): 48.68, #queue-req: 0
  177. 2025-05-17 01:47:23,917 - __main__ - INFO - sglang running req: 1 queue req: 0
  178. 2025-05-17 01:47:24,738 - sglang - INFO - [2025-05-17 01:47:24 TP0] Decode batch. #running-req: 1, #token: 2202, token usage: 0.06, gen throughput (token/s): 48.72, #queue-req: 0
  179. 2025-05-17 01:47:24,738 - __main__ - INFO - sglang running req: 1 queue req: 0
  180. 2025-05-17 01:47:24,995 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  181. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 1 exiting due to empty queue
  182. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 2 exiting due to empty queue
  183. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 3 exiting due to empty queue
  184. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 4 exiting due to empty queue
  185. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 5 exiting due to empty queue
  186. 2025-05-17 01:47:24,996 - __main__ - INFO - Worker 6 exiting due to empty queue
  187. 2025-05-17 01:47:24,997 - __main__ - INFO - Worker 7 exiting due to empty queue
  188. 2025-05-17 01:47:25,559 - sglang - INFO - [2025-05-17 01:47:25 TP0] Decode batch. #running-req: 1, #token: 2242, token usage: 0.06, gen throughput (token/s): 48.67, #queue-req: 0
  189. 2025-05-17 01:47:25,560 - __main__ - INFO - sglang running req: 1 queue req: 0
  190. 2025-05-17 01:47:25,730 - __main__ - INFO - Finished TaskGroup for worker on 91107f3e53da42365e4111879440c8b71d98ac54
  191. 2025-05-17 01:47:25,730 - __main__ - INFO - Got 1 docs for 91107f3e53da42365e4111879440c8b71d98ac54
  192. 2025-05-17 01:47:25,731 - __main__ - INFO - Worker 0 exiting due to empty queue
  193. 2025-05-17 01:47:25,732 - __main__ - INFO - Work done
  194. 2025-05-17 01:47:25,733 - __main__ - INFO - Got cancellation request for SGLang server
  195. 2025-05-17 01:47:46,579 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  196. 2025-05-17 01:47:46,580 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  197. 2025-05-17 01:47:46,580 - __main__ - INFO - Found 1 total pdf paths to add
  198. 2025-05-17 01:47:46,583 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  199. 2025-05-17 01:47:46,815 - __main__ - INFO - Starting pipeline with PID 371834
  200. 2025-05-17 01:47:46,815 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  201. 2025-05-17 01:47:52,318 - __main__ - INFO - No work to do, exiting
  202. 2025-05-17 02:06:25,410 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  203. 2025-05-17 02:06:25,410 - __main__ - INFO - Loading file at olmocr_workspace/job_1747418779/input.pdf as PDF document
  204. 2025-05-17 02:06:25,410 - __main__ - INFO - Found 1 total pdf paths to add
  205. 2025-05-17 02:06:25,413 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  206. 2025-05-17 02:06:25,669 - __main__ - INFO - Starting pipeline with PID 372551
  207. 2025-05-17 02:06:25,669 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  208. 2025-05-17 02:06:26,283 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  209. 2025-05-17 02:06:27,320 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  210. 2025-05-17 02:06:28,373 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  211. 2025-05-17 02:06:29,440 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  212. 2025-05-17 02:06:30,508 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  213. 2025-05-17 02:06:31,575 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  214. 2025-05-17 02:06:32,643 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  215. 2025-05-17 02:06:32,693 - sglang - INFO - [2025-05-17 02:06:32] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=68477412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  216. 2025-05-17 02:06:32,694 - __main__ - INFO - [2025-05-17 02:06:32] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=68477412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  217. 2025-05-17 02:06:33,721 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  218. 2025-05-17 02:06:34,772 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  219. 2025-05-17 02:06:35,835 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  220. 2025-05-17 02:06:36,142 - sglang - INFO - [2025-05-17 02:06:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
  221. 2025-05-17 02:06:36,142 - __main__ - INFO - [2025-05-17 02:06:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
  222. 2025-05-17 02:06:36,911 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  223. 2025-05-17 02:06:37,978 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  224. 2025-05-17 02:06:39,046 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  225. 2025-05-17 02:06:40,113 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  226. 2025-05-17 02:06:41,182 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  227. 2025-05-17 02:06:42,250 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  228. 2025-05-17 02:06:42,356 - sglang - INFO - [2025-05-17 02:06:42 TP0] Overlap scheduler is disabled for multimodal models.
  229. 2025-05-17 02:06:42,356 - __main__ - INFO - [2025-05-17 02:06:42 TP0] Overlap scheduler is disabled for multimodal models.
  230. 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  231. 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  232. 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  233. 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  234. 2025-05-17 02:06:43,326 - sglang - INFO - [2025-05-17 02:06:43 TP0] Init torch distributed begin.
  235. 2025-05-17 02:06:43,326 - __main__ - INFO - [2025-05-17 02:06:43 TP0] Init torch distributed begin.
  236. 2025-05-17 02:06:43,328 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  237. 2025-05-17 02:06:44,391 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  238. 2025-05-17 02:06:45,463 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  239. 2025-05-17 02:06:46,529 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  240. 2025-05-17 02:06:47,595 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  241. 2025-05-17 02:06:48,662 - sglang - INFO - [2025-05-17 02:06:48 TP0] Load weight begin. avail mem=23.33 GB
  242. 2025-05-17 02:06:48,662 - __main__ - INFO - [2025-05-17 02:06:48 TP0] Load weight begin. avail mem=23.33 GB
  243. 2025-05-17 02:06:48,664 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  244. 2025-05-17 02:06:49,732 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  245. 2025-05-17 02:06:49,806 - sglang - INFO - [2025-05-17 02:06:49 TP0] Using model weights format ['*.safetensors']
  246. 2025-05-17 02:06:49,806 - __main__ - INFO - [2025-05-17 02:06:49 TP0] Using model weights format ['*.safetensors']
  247. 2025-05-17 02:06:50,809 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  248. 2025-05-17 02:06:50,829 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  249. 2025-05-17 02:06:50,829 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  250. 2025-05-17 02:06:51,188 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.79it/s]
  251. 2025-05-17 02:06:51,188 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.79it/s]
  252. 2025-05-17 02:06:51,887 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  253. 2025-05-17 02:06:52,325 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
  254. 2025-05-17 02:06:52,326 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
  255. 2025-05-17 02:06:52,964 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  256. 2025-05-17 02:06:53,620 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.03s/it]
  257. 2025-05-17 02:06:53,620 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.03s/it]
  258. 2025-05-17 02:06:54,040 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  259. 2025-05-17 02:06:54,849 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.11s/it]
  260. 2025-05-17 02:06:54,849 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.11s/it]
  261. 2025-05-17 02:06:54,849 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  262. 2025-05-17 02:06:54,849 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  263. 2025-05-17 02:06:54,849 - sglang - INFO -
  264. 2025-05-17 02:06:54,849 - __main__ - INFO -
  265. 2025-05-17 02:06:55,115 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  266. 2025-05-17 02:06:55,131 - sglang - INFO - [2025-05-17 02:06:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  267. 2025-05-17 02:06:55,132 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  268. 2025-05-17 02:06:55,137 - sglang - INFO - [2025-05-17 02:06:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  269. 2025-05-17 02:06:55,137 - __main__ - INFO - [2025-05-17 02:06:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  270. 2025-05-17 02:06:55,138 - sglang - INFO - [2025-05-17 02:06:55 TP0] Memory pool end. avail mem=5.30 GB
  271. 2025-05-17 02:06:55,138 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Memory pool end. avail mem=5.30 GB
  272. 2025-05-17 02:06:55,283 - sglang - INFO - [2025-05-17 02:06:55 TP0] Capture cuda graph begin. This can take up to several minutes.
  273. 2025-05-17 02:06:55,283 - __main__ - INFO - [2025-05-17 02:06:55 TP0] Capture cuda graph begin. This can take up to several minutes.
  274. 2025-05-17 02:06:56,192 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  275. 2025-05-17 02:06:56,913 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.11it/s] 50%|█████ | 2/4 [00:01<00:01, 1.95it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s] 100%|██████████| 4/4 [00:01<00:00, 3.02it/s] 100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
  276. 2025-05-17 02:06:56,913 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.11it/s] 50%|█████ | 2/4 [00:01<00:01, 1.95it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s] 100%|██████████| 4/4 [00:01<00:00, 3.02it/s] 100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
  277. 2025-05-17 02:06:56,913 - sglang - INFO - [2025-05-17 02:06:56 TP0] Capture cuda graph end. Time elapsed: 1.63 s
  278. 2025-05-17 02:06:56,913 - __main__ - INFO - [2025-05-17 02:06:56 TP0] Capture cuda graph end. Time elapsed: 1.63 s
  279. 2025-05-17 02:06:57,268 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  280. 2025-05-17 02:06:58,336 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  281. 2025-05-17 02:06:59,395 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  282. 2025-05-17 02:06:59,613 - sglang - INFO - [2025-05-17 02:06:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  283. 2025-05-17 02:06:59,613 - __main__ - INFO - [2025-05-17 02:06:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  284. 2025-05-17 02:07:00,478 - __main__ - INFO - sglang server is ready.
  285. 2025-05-17 02:07:00,478 - __main__ - INFO - Queue remaining: 1
  286. 2025-05-17 02:07:00,478 - __main__ - INFO -
  287. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  288. ----------------------------------------------------------------------------------
  289. 2025-05-17 02:07:00,478 - __main__ - INFO -
  290. Worker ID
  291. ---------
  292. 2025-05-17 02:07:00,478 - __main__ - INFO - Worker 0 processing work item 1985df71617509ec45bc3584a8b12ba3e920e0d5
  293. 2025-05-17 02:07:00,479 - __main__ - INFO - Created all tasks for 1985df71617509ec45bc3584a8b12ba3e920e0d5
  294. 2025-05-17 02:07:00,481 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747418779/input.pdf in worker 0
  295. 2025-05-17 02:07:00,688 - sglang - INFO - [2025-05-17 02:07:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  296. 2025-05-17 02:07:00,688 - __main__ - INFO - [2025-05-17 02:07:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  297. 2025-05-17 02:07:00,688 - __main__ - INFO - sglang running req: 0 queue req: 0
  298. 2025-05-17 02:07:01,251 - sglang - INFO - [2025-05-17 02:07:01] The server is fired up and ready to roll!
  299. 2025-05-17 02:07:01,251 - __main__ - INFO - [2025-05-17 02:07:01] The server is fired up and ready to roll!
  300. 2025-05-17 02:07:06,759 - __main__ - INFO - Built page query for olmocr_workspace/job_1747418779/input.pdf-1
  301. 2025-05-17 02:07:10,480 - __main__ - INFO - Queue remaining: 0
  302. 2025-05-17 02:07:10,480 - __main__ - INFO -
  303. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  304. ----------------------------------------------------------------------------------
  305. 2025-05-17 02:07:10,480 - __main__ - INFO -
  306. Worker ID | started
  307. ----------+--------
  308. 0 | 1
  309. 2025-05-17 02:07:13,323 - sglang - INFO - [2025-05-17 02:07:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1840, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  310. 2025-05-17 02:07:13,323 - __main__ - INFO - sglang running req: 0 queue req: 0
  311. 2025-05-17 02:07:17,479 - sglang - INFO - [2025-05-17 02:07:17 TP0] Decode batch. #running-req: 1, #token: 1873, token usage: 0.05, gen throughput (token/s): 2.24, #queue-req: 0
  312. 2025-05-17 02:07:17,479 - __main__ - INFO - sglang running req: 1 queue req: 0
  313. 2025-05-17 02:07:20,482 - __main__ - INFO - Queue remaining: 0
  314. 2025-05-17 02:07:20,482 - __main__ - INFO -
  315. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  316. ----------------------------------------------------------------------------------
  317. 2025-05-17 02:07:20,482 - __main__ - INFO -
  318. Worker ID | started
  319. ----------+--------
  320. 0 | 1
  321. 2025-05-17 02:07:21,301 - sglang - INFO - [2025-05-17 02:07:21 TP0] Decode batch. #running-req: 1, #token: 1913, token usage: 0.05, gen throughput (token/s): 10.47, #queue-req: 0
  322. 2025-05-17 02:07:21,301 - __main__ - INFO - sglang running req: 1 queue req: 0
  323. 2025-05-17 02:07:23,379 - sglang - INFO - [2025-05-17 02:07:23 TP0] Decode batch. #running-req: 1, #token: 1953, token usage: 0.05, gen throughput (token/s): 19.25, #queue-req: 0
  324. 2025-05-17 02:07:23,379 - __main__ - INFO - sglang running req: 1 queue req: 0
  325. 2025-05-17 02:07:24,962 - sglang - INFO - [2025-05-17 02:07:24 TP0] Decode batch. #running-req: 1, #token: 1993, token usage: 0.05, gen throughput (token/s): 25.27, #queue-req: 0
  326. 2025-05-17 02:07:24,962 - __main__ - INFO - sglang running req: 1 queue req: 0
  327. 2025-05-17 02:07:25,796 - sglang - INFO - [2025-05-17 02:07:25 TP0] Decode batch. #running-req: 1, #token: 2033, token usage: 0.05, gen throughput (token/s): 47.97, #queue-req: 0
  328. 2025-05-17 02:07:25,796 - __main__ - INFO - sglang running req: 1 queue req: 0
  329. 2025-05-17 02:07:26,615 - sglang - INFO - [2025-05-17 02:07:26 TP0] Decode batch. #running-req: 1, #token: 2073, token usage: 0.05, gen throughput (token/s): 48.80, #queue-req: 0
  330. 2025-05-17 02:07:26,616 - __main__ - INFO - sglang running req: 1 queue req: 0
  331. 2025-05-17 02:07:27,436 - sglang - INFO - [2025-05-17 02:07:27 TP0] Decode batch. #running-req: 1, #token: 2113, token usage: 0.06, gen throughput (token/s): 48.73, #queue-req: 0
  332. 2025-05-17 02:07:27,436 - __main__ - INFO - sglang running req: 1 queue req: 0
  333. 2025-05-17 02:07:28,257 - sglang - INFO - [2025-05-17 02:07:28 TP0] Decode batch. #running-req: 1, #token: 2153, token usage: 0.06, gen throughput (token/s): 48.74, #queue-req: 0
  334. 2025-05-17 02:07:28,257 - __main__ - INFO - sglang running req: 1 queue req: 0
  335. 2025-05-17 02:07:28,981 - __main__ - INFO - Finished TaskGroup for worker on 1985df71617509ec45bc3584a8b12ba3e920e0d5
  336. 2025-05-17 02:07:28,981 - __main__ - INFO - Got 1 docs for 1985df71617509ec45bc3584a8b12ba3e920e0d5
  337. 2025-05-17 02:07:28,982 - __main__ - INFO - Worker 1 exiting due to empty queue
  338. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 2 exiting due to empty queue
  339. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 3 exiting due to empty queue
  340. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 4 exiting due to empty queue
  341. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 5 exiting due to empty queue
  342. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 6 exiting due to empty queue
  343. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 7 exiting due to empty queue
  344. 2025-05-17 02:07:28,983 - __main__ - INFO - Worker 0 exiting due to empty queue
  345. 2025-05-17 02:07:28,984 - __main__ - INFO - Work done
  346. 2025-05-17 02:07:28,984 - __main__ - INFO - Got cancellation request for SGLang server
  347. 2025-05-17 02:09:17,270 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  348. 2025-05-17 02:09:17,270 - __main__ - INFO - Loading file at olmocr_workspace/job_1747418950/input.pdf as PDF document
  349. 2025-05-17 02:09:17,270 - __main__ - INFO - Found 1 total pdf paths to add
  350. 2025-05-17 02:09:17,273 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  351. 2025-05-17 02:09:17,507 - __main__ - INFO - Starting pipeline with PID 373591
  352. 2025-05-17 02:09:17,507 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  353. 2025-05-17 02:09:23,117 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  354. 2025-05-17 02:09:24,158 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  355. 2025-05-17 02:09:25,214 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  356. 2025-05-17 02:09:26,275 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  357. 2025-05-17 02:09:27,338 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  358. 2025-05-17 02:09:28,405 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  359. 2025-05-17 02:09:29,149 - sglang - INFO - [2025-05-17 02:09:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=493503861, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  360. 2025-05-17 02:09:29,149 - __main__ - INFO - [2025-05-17 02:09:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=493503861, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  361. 2025-05-17 02:09:29,480 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  362. 2025-05-17 02:09:30,548 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  363. 2025-05-17 02:09:31,615 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  364. 2025-05-17 02:09:32,682 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  365. 2025-05-17 02:09:33,750 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  366. 2025-05-17 02:09:34,820 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  367. 2025-05-17 02:09:35,888 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  368. 2025-05-17 02:09:36,955 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  369. 2025-05-17 02:09:37,795 - sglang - INFO - [2025-05-17 02:09:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
  370. 2025-05-17 02:09:37,795 - __main__ - INFO - [2025-05-17 02:09:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
  371. 2025-05-17 02:09:38,032 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  372. 2025-05-17 02:09:39,099 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  373. 2025-05-17 02:09:40,166 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  374. 2025-05-17 02:09:41,231 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  375. 2025-05-17 02:09:42,287 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  376. 2025-05-17 02:09:43,351 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  377. 2025-05-17 02:09:43,499 - sglang - INFO - [2025-05-17 02:09:43 TP0] Overlap scheduler is disabled for multimodal models.
  378. 2025-05-17 02:09:43,499 - __main__ - INFO - [2025-05-17 02:09:43 TP0] Overlap scheduler is disabled for multimodal models.
  379. 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  380. 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  381. 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  382. 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  383. 2025-05-17 02:09:44,048 - sglang - INFO - [2025-05-17 02:09:44 TP0] Init torch distributed begin.
  384. 2025-05-17 02:09:44,048 - __main__ - INFO - [2025-05-17 02:09:44 TP0] Init torch distributed begin.
  385. 2025-05-17 02:09:44,428 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  386. 2025-05-17 02:09:45,495 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  387. 2025-05-17 02:09:46,562 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  388. 2025-05-17 02:09:47,629 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  389. 2025-05-17 02:09:48,697 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  390. 2025-05-17 02:09:49,376 - sglang - INFO - [2025-05-17 02:09:49 TP0] Load weight begin. avail mem=23.33 GB
  391. 2025-05-17 02:09:49,376 - __main__ - INFO - [2025-05-17 02:09:49 TP0] Load weight begin. avail mem=23.33 GB
  392. 2025-05-17 02:09:49,775 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  393. 2025-05-17 02:09:50,468 - sglang - INFO - [2025-05-17 02:09:50 TP0] Using model weights format ['*.safetensors']
  394. 2025-05-17 02:09:50,468 - __main__ - INFO - [2025-05-17 02:09:50 TP0] Using model weights format ['*.safetensors']
  395. 2025-05-17 02:09:50,852 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  396. 2025-05-17 02:09:50,957 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  397. 2025-05-17 02:09:50,957 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  398. 2025-05-17 02:09:51,279 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.11it/s]
  399. 2025-05-17 02:09:51,279 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.11it/s]
  400. 2025-05-17 02:09:51,930 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  401. 2025-05-17 02:09:52,270 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.40it/s]
  402. 2025-05-17 02:09:52,270 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.40it/s]
  403. 2025-05-17 02:09:53,007 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  404. 2025-05-17 02:09:53,282 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
  405. 2025-05-17 02:09:53,282 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
  406. 2025-05-17 02:09:54,083 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  407. 2025-05-17 02:09:54,292 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
  408. 2025-05-17 02:09:54,292 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
  409. 2025-05-17 02:09:54,292 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  410. 2025-05-17 02:09:54,292 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  411. 2025-05-17 02:09:54,292 - sglang - INFO -
  412. 2025-05-17 02:09:54,292 - __main__ - INFO -
  413. 2025-05-17 02:09:54,469 - sglang - INFO - [2025-05-17 02:09:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  414. 2025-05-17 02:09:54,469 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  415. 2025-05-17 02:09:54,476 - sglang - INFO - [2025-05-17 02:09:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  416. 2025-05-17 02:09:54,476 - __main__ - INFO - [2025-05-17 02:09:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  417. 2025-05-17 02:09:54,477 - sglang - INFO - [2025-05-17 02:09:54 TP0] Memory pool end. avail mem=5.30 GB
  418. 2025-05-17 02:09:54,477 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Memory pool end. avail mem=5.30 GB
  419. 2025-05-17 02:09:54,691 - sglang - INFO - [2025-05-17 02:09:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  420. 2025-05-17 02:09:54,691 - __main__ - INFO - [2025-05-17 02:09:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  421. 2025-05-17 02:09:55,161 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  422. 2025-05-17 02:09:56,237 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  423. 2025-05-17 02:09:56,344 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.09it/s] 50%|█████ | 2/4 [00:01<00:01, 1.92it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.54it/s] 100%|██████████| 4/4 [00:01<00:00, 2.98it/s] 100%|██████████| 4/4 [00:01<00:00, 2.42it/s]
  424. 2025-05-17 02:09:56,344 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.09it/s] 50%|█████ | 2/4 [00:01<00:01, 1.92it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.54it/s] 100%|██████████| 4/4 [00:01<00:00, 2.98it/s] 100%|██████████| 4/4 [00:01<00:00, 2.42it/s]
  425. 2025-05-17 02:09:56,345 - sglang - INFO - [2025-05-17 02:09:56 TP0] Capture cuda graph end. Time elapsed: 1.65 s
  426. 2025-05-17 02:09:56,345 - __main__ - INFO - [2025-05-17 02:09:56 TP0] Capture cuda graph end. Time elapsed: 1.65 s
  427. 2025-05-17 02:09:57,310 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  428. 2025-05-17 02:09:58,365 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  429. 2025-05-17 02:09:59,278 - sglang - INFO - [2025-05-17 02:09:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  430. 2025-05-17 02:09:59,279 - __main__ - INFO - [2025-05-17 02:09:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  431. 2025-05-17 02:09:59,453 - __main__ - INFO - sglang server is ready.
  432. 2025-05-17 02:09:59,453 - __main__ - INFO - Queue remaining: 1
  433. 2025-05-17 02:09:59,453 - __main__ - INFO -
  434. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  435. ----------------------------------------------------------------------------------
  436. 2025-05-17 02:09:59,453 - __main__ - INFO -
  437. Worker ID
  438. ---------
  439. 2025-05-17 02:09:59,454 - __main__ - INFO - Worker 0 processing work item 0da57e3be5fb46a909ca98a2aee35e16856bab58
  440. 2025-05-17 02:09:59,454 - __main__ - INFO - Created all tasks for 0da57e3be5fb46a909ca98a2aee35e16856bab58
  441. 2025-05-17 02:09:59,456 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747418950/input.pdf in worker 0
  442. 2025-05-17 02:10:00,385 - sglang - INFO - [2025-05-17 02:10:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  443. 2025-05-17 02:10:00,385 - __main__ - INFO - [2025-05-17 02:10:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  444. 2025-05-17 02:10:00,386 - __main__ - INFO - sglang running req: 0 queue req: 0
  445. 2025-05-17 02:10:00,947 - sglang - INFO - [2025-05-17 02:10:00] The server is fired up and ready to roll!
  446. 2025-05-17 02:10:00,948 - __main__ - INFO - [2025-05-17 02:10:00] The server is fired up and ready to roll!
  447. 2025-05-17 02:10:05,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1747418950/input.pdf-1
  448. 2025-05-17 02:10:09,479 - __main__ - INFO - Queue remaining: 0
  449. 2025-05-17 02:10:09,479 - __main__ - INFO -
  450. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  451. ----------------------------------------------------------------------------------
  452. 2025-05-17 02:10:09,480 - __main__ - INFO -
  453. Worker ID | started
  454. ----------+--------
  455. 0 | 1
  456. 2025-05-17 02:10:19,482 - __main__ - INFO - Queue remaining: 0
  457. 2025-05-17 02:10:19,482 - __main__ - INFO -
  458. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  459. ----------------------------------------------------------------------------------
  460. 2025-05-17 02:10:19,483 - __main__ - INFO -
  461. Worker ID | started
  462. ----------+--------
  463. 0 | 1
  464. 2025-05-17 02:10:27,304 - sglang - INFO - [2025-05-17 02:10:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  465. 2025-05-17 02:10:27,304 - __main__ - INFO - sglang running req: 0 queue req: 0
  466. 2025-05-17 02:10:28,725 - sglang - INFO - [2025-05-17 02:10:28 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.36, #queue-req: 0
  467. 2025-05-17 02:10:28,725 - __main__ - INFO - sglang running req: 1 queue req: 0
  468. 2025-05-17 02:10:29,484 - __main__ - INFO - Queue remaining: 0
  469. 2025-05-17 02:10:29,484 - __main__ - INFO -
  470. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  471. ----------------------------------------------------------------------------------
  472. 2025-05-17 02:10:29,484 - __main__ - INFO -
  473. Worker ID | started
  474. ----------+--------
  475. 0 | 1
  476. 2025-05-17 02:10:29,545 - sglang - INFO - [2025-05-17 02:10:29 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.76, #queue-req: 0
  477. 2025-05-17 02:10:29,545 - __main__ - INFO - sglang running req: 1 queue req: 0
  478. 2025-05-17 02:10:30,363 - sglang - INFO - [2025-05-17 02:10:30 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.89, #queue-req: 0
  479. 2025-05-17 02:10:30,363 - __main__ - INFO - sglang running req: 1 queue req: 0
  480. 2025-05-17 02:10:31,182 - sglang - INFO - [2025-05-17 02:10:31 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.85, #queue-req: 0
  481. 2025-05-17 02:10:31,182 - __main__ - INFO - sglang running req: 1 queue req: 0
  482. 2025-05-17 02:10:31,590 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  483. 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 1 exiting due to empty queue
  484. 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 2 exiting due to empty queue
  485. 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 3 exiting due to empty queue
  486. 2025-05-17 02:10:31,590 - __main__ - INFO - Worker 4 exiting due to empty queue
  487. 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 5 exiting due to empty queue
  488. 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 6 exiting due to empty queue
  489. 2025-05-17 02:10:31,591 - __main__ - INFO - Worker 7 exiting due to empty queue
  490. 2025-05-17 02:10:32,000 - sglang - INFO - [2025-05-17 02:10:32 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
  491. 2025-05-17 02:10:32,001 - __main__ - INFO - sglang running req: 1 queue req: 0
  492. 2025-05-17 02:10:32,819 - sglang - INFO - [2025-05-17 02:10:32 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.84, #queue-req: 0
  493. 2025-05-17 02:10:32,819 - __main__ - INFO - sglang running req: 1 queue req: 0
  494. 2025-05-17 02:10:33,446 - __main__ - INFO - Finished TaskGroup for worker on 0da57e3be5fb46a909ca98a2aee35e16856bab58
  495. 2025-05-17 02:10:33,446 - __main__ - INFO - Got 1 docs for 0da57e3be5fb46a909ca98a2aee35e16856bab58
  496. 2025-05-17 02:10:33,448 - __main__ - INFO - Worker 0 exiting due to empty queue
  497. 2025-05-17 02:10:33,448 - __main__ - INFO - Work done
  498. 2025-05-17 02:10:33,448 - __main__ - INFO - Got cancellation request for SGLang server
  499. 2025-05-17 02:12:55,176 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  500. 2025-05-17 02:12:55,176 - __main__ - INFO - Loading file at olmocr_workspace/job_1747419168/input.pdf as PDF document
  501. 2025-05-17 02:12:55,176 - __main__ - INFO - Found 1 total pdf paths to add
  502. 2025-05-17 02:12:55,183 - __main__ - INFO - Calculated items_per_group: 62 based on average pages per PDF: 8.00
  503. 2025-05-17 02:12:55,421 - __main__ - INFO - Starting pipeline with PID 374647
  504. 2025-05-17 02:12:55,421 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  505. 2025-05-17 02:13:01,024 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  506. 2025-05-17 02:13:02,064 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  507. 2025-05-17 02:13:03,120 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  508. 2025-05-17 02:13:04,188 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  509. 2025-05-17 02:13:05,256 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  510. 2025-05-17 02:13:06,244 - sglang - INFO - [2025-05-17 02:13:06] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=872630305, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  511. 2025-05-17 02:13:06,244 - __main__ - INFO - [2025-05-17 02:13:06] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=872630305, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  512. 2025-05-17 02:13:06,386 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  513. 2025-05-17 02:13:07,437 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  514. 2025-05-17 02:13:08,501 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  515. 2025-05-17 02:13:09,570 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  516. 2025-05-17 02:13:10,640 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  517. 2025-05-17 02:13:11,707 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  518. 2025-05-17 02:13:12,783 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  519. 2025-05-17 02:13:13,857 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  520. 2025-05-17 02:13:14,765 - sglang - INFO - [2025-05-17 02:13:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  521. 2025-05-17 02:13:14,765 - __main__ - INFO - [2025-05-17 02:13:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  522. 2025-05-17 02:13:14,934 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  523. 2025-05-17 02:13:16,008 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  524. 2025-05-17 02:13:17,076 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  525. 2025-05-17 02:13:18,136 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  526. 2025-05-17 02:13:19,206 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  527. 2025-05-17 02:13:20,279 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  528. 2025-05-17 02:13:20,559 - sglang - INFO - [2025-05-17 02:13:20 TP0] Overlap scheduler is disabled for multimodal models.
  529. 2025-05-17 02:13:20,559 - __main__ - INFO - [2025-05-17 02:13:20 TP0] Overlap scheduler is disabled for multimodal models.
  530. 2025-05-17 02:13:21,049 - sglang - INFO - [2025-05-17 02:13:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  531. 2025-05-17 02:13:21,049 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  532. 2025-05-17 02:13:21,049 - sglang - INFO - [2025-05-17 02:13:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  533. 2025-05-17 02:13:21,049 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  534. 2025-05-17 02:13:21,050 - sglang - INFO - [2025-05-17 02:13:21 TP0] Init torch distributed begin.
  535. 2025-05-17 02:13:21,050 - __main__ - INFO - [2025-05-17 02:13:21 TP0] Init torch distributed begin.
  536. 2025-05-17 02:13:21,357 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  537. 2025-05-17 02:13:22,424 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  538. 2025-05-17 02:13:23,487 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  539. 2025-05-17 02:13:24,554 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  540. 2025-05-17 02:13:25,624 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  541. 2025-05-17 02:13:26,409 - sglang - INFO - [2025-05-17 02:13:26 TP0] Load weight begin. avail mem=23.33 GB
  542. 2025-05-17 02:13:26,409 - __main__ - INFO - [2025-05-17 02:13:26 TP0] Load weight begin. avail mem=23.33 GB
  543. 2025-05-17 02:13:26,710 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  544. 2025-05-17 02:13:27,540 - sglang - INFO - [2025-05-17 02:13:27 TP0] Using model weights format ['*.safetensors']
  545. 2025-05-17 02:13:27,540 - __main__ - INFO - [2025-05-17 02:13:27 TP0] Using model weights format ['*.safetensors']
  546. 2025-05-17 02:13:27,778 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  547. 2025-05-17 02:13:28,055 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  548. 2025-05-17 02:13:28,055 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  549. 2025-05-17 02:13:28,577 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 1.92it/s]
  550. 2025-05-17 02:13:28,577 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 1.92it/s]
  551. 2025-05-17 02:13:28,818 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  552. 2025-05-17 02:13:29,879 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  553. 2025-05-17 02:13:29,976 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:02, 1.04s/it]
  554. 2025-05-17 02:13:29,976 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:02, 1.04s/it]
  555. 2025-05-17 02:13:30,958 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  556. 2025-05-17 02:13:31,289 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
  557. 2025-05-17 02:13:31,289 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
  558. 2025-05-17 02:13:32,036 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  559. 2025-05-17 02:13:32,471 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.17s/it]
  560. 2025-05-17 02:13:32,471 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.17s/it]
  561. 2025-05-17 02:13:32,471 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.10s/it]
  562. 2025-05-17 02:13:32,471 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.10s/it]
  563. 2025-05-17 02:13:32,471 - sglang - INFO -
  564. 2025-05-17 02:13:32,471 - __main__ - INFO -
  565. 2025-05-17 02:13:32,656 - sglang - INFO - [2025-05-17 02:13:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  566. 2025-05-17 02:13:32,656 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  567. 2025-05-17 02:13:32,662 - sglang - INFO - [2025-05-17 02:13:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  568. 2025-05-17 02:13:32,662 - __main__ - INFO - [2025-05-17 02:13:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  569. 2025-05-17 02:13:32,662 - sglang - INFO - [2025-05-17 02:13:32 TP0] Memory pool end. avail mem=5.30 GB
  570. 2025-05-17 02:13:32,662 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Memory pool end. avail mem=5.30 GB
  571. 2025-05-17 02:13:32,819 - sglang - INFO - [2025-05-17 02:13:32 TP0] Capture cuda graph begin. This can take up to several minutes.
  572. 2025-05-17 02:13:32,819 - __main__ - INFO - [2025-05-17 02:13:32 TP0] Capture cuda graph begin. This can take up to several minutes.
  573. 2025-05-17 02:13:33,114 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  574. 2025-05-17 02:13:34,191 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  575. 2025-05-17 02:13:34,510 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.86it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.94it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  576. 2025-05-17 02:13:34,510 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.86it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.94it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  577. 2025-05-17 02:13:34,511 - sglang - INFO - [2025-05-17 02:13:34 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  578. 2025-05-17 02:13:34,511 - __main__ - INFO - [2025-05-17 02:13:34 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  579. 2025-05-17 02:13:35,269 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  580. 2025-05-17 02:13:36,337 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  581. 2025-05-17 02:13:36,877 - sglang - INFO - [2025-05-17 02:13:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  582. 2025-05-17 02:13:36,877 - __main__ - INFO - [2025-05-17 02:13:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  583. 2025-05-17 02:13:37,430 - __main__ - INFO - sglang server is ready.
  584. 2025-05-17 02:13:37,431 - __main__ - INFO - Queue remaining: 1
  585. 2025-05-17 02:13:37,431 - __main__ - INFO -
  586. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  587. ----------------------------------------------------------------------------------
  588. 2025-05-17 02:13:37,431 - __main__ - INFO -
  589. Worker ID
  590. ---------
  591. 2025-05-17 02:13:37,431 - __main__ - INFO - Worker 0 processing work item 68de5843976d18df5ea068383feb21dd169b186d
  592. 2025-05-17 02:13:37,431 - __main__ - INFO - Created all tasks for 68de5843976d18df5ea068383feb21dd169b186d
  593. 2025-05-17 02:13:37,439 - __main__ - INFO - Got 8 pages to do for olmocr_workspace/job_1747419168/input.pdf in worker 0
  594. 2025-05-17 02:13:37,952 - sglang - INFO - [2025-05-17 02:13:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  595. 2025-05-17 02:13:37,952 - __main__ - INFO - [2025-05-17 02:13:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  596. 2025-05-17 02:13:37,952 - __main__ - INFO - sglang running req: 0 queue req: 0
  597. 2025-05-17 02:13:38,982 - sglang - INFO - [2025-05-17 02:13:38] The server is fired up and ready to roll!
  598. 2025-05-17 02:13:38,982 - __main__ - INFO - [2025-05-17 02:13:38] The server is fired up and ready to roll!
  599. 2025-05-17 02:13:44,098 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-1
  600. 2025-05-17 02:13:44,105 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-2
  601. 2025-05-17 02:13:44,149 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-3
  602. 2025-05-17 02:13:44,185 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-6
  603. 2025-05-17 02:13:44,206 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-4
  604. 2025-05-17 02:13:44,208 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-5
  605. 2025-05-17 02:13:44,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-8
  606. 2025-05-17 02:13:44,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419168/input.pdf-7
  607. 2025-05-17 02:13:47,479 - __main__ - INFO - Queue remaining: 0
  608. 2025-05-17 02:13:47,479 - __main__ - INFO -
  609. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  610. ----------------------------------------------------------------------------------
  611. 2025-05-17 02:13:47,480 - __main__ - INFO -
  612. Worker ID | started
  613. ----------+--------
  614. 0 | 8
  615. 2025-05-17 02:13:57,481 - __main__ - INFO - Queue remaining: 0
  616. 2025-05-17 02:13:57,481 - __main__ - INFO -
  617. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  618. ----------------------------------------------------------------------------------
  619. 2025-05-17 02:13:57,481 - __main__ - INFO -
  620. Worker ID | started
  621. ----------+--------
  622. 0 | 8
  623. 2025-05-17 02:14:05,287 - sglang - INFO - [2025-05-17 02:14:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1171, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  624. 2025-05-17 02:14:05,287 - __main__ - INFO - sglang running req: 0 queue req: 0
  625. 2025-05-17 02:14:06,033 - sglang - INFO - [2025-05-17 02:14:06 TP0] Prefill batch. #new-seq: 6, #new-token: 12991, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 1
  626. 2025-05-17 02:14:06,033 - __main__ - INFO - sglang running req: 1 queue req: 1
  627. 2025-05-17 02:14:07,482 - __main__ - INFO - Queue remaining: 0
  628. 2025-05-17 02:14:07,482 - __main__ - INFO -
  629. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  630. ----------------------------------------------------------------------------------
  631. 2025-05-17 02:14:07,483 - __main__ - INFO -
  632. Worker ID | started
  633. ----------+--------
  634. 0 | 8
  635. 2025-05-17 02:14:11,389 - sglang - INFO - [2025-05-17 02:14:11 TP0] Decode batch. #running-req: 7, #token: 13189, token usage: 0.35, gen throughput (token/s): 6.90, #queue-req: 1
  636. 2025-05-17 02:14:11,389 - __main__ - INFO - sglang running req: 7 queue req: 1
  637. 2025-05-17 02:14:11,411 - sglang - INFO - [2025-05-17 02:14:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1764, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.35, #running-req: 6, #queue-req: 0
  638. 2025-05-17 02:14:11,411 - __main__ - INFO - sglang running req: 6 queue req: 0
  639. 2025-05-17 02:14:12,294 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  640. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 1 exiting due to empty queue
  641. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 2 exiting due to empty queue
  642. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 3 exiting due to empty queue
  643. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 4 exiting due to empty queue
  644. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 5 exiting due to empty queue
  645. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 6 exiting due to empty queue
  646. 2025-05-17 02:14:12,295 - __main__ - INFO - Worker 7 exiting due to empty queue
  647. 2025-05-17 02:14:12,857 - sglang - INFO - [2025-05-17 02:14:12 TP0] Decode batch. #running-req: 6, #token: 14047, token usage: 0.37, gen throughput (token/s): 175.03, #queue-req: 0
  648. 2025-05-17 02:14:12,857 - __main__ - INFO - sglang running req: 6 queue req: 0
  649. 2025-05-17 02:14:13,726 - sglang - INFO - [2025-05-17 02:14:13 TP0] Decode batch. #running-req: 6, #token: 14287, token usage: 0.38, gen throughput (token/s): 276.13, #queue-req: 0
  650. 2025-05-17 02:14:13,727 - __main__ - INFO - sglang running req: 6 queue req: 0
  651. 2025-05-17 02:14:14,595 - sglang - INFO - [2025-05-17 02:14:14 TP0] Decode batch. #running-req: 6, #token: 14527, token usage: 0.38, gen throughput (token/s): 276.46, #queue-req: 0
  652. 2025-05-17 02:14:14,595 - __main__ - INFO - sglang running req: 6 queue req: 0
  653. 2025-05-17 02:14:15,463 - sglang - INFO - [2025-05-17 02:14:15 TP0] Decode batch. #running-req: 6, #token: 14767, token usage: 0.39, gen throughput (token/s): 276.44, #queue-req: 0
  654. 2025-05-17 02:14:15,463 - __main__ - INFO - sglang running req: 6 queue req: 0
  655. 2025-05-17 02:14:16,331 - sglang - INFO - [2025-05-17 02:14:16 TP0] Decode batch. #running-req: 6, #token: 15007, token usage: 0.40, gen throughput (token/s): 276.44, #queue-req: 0
  656. 2025-05-17 02:14:16,331 - __main__ - INFO - sglang running req: 6 queue req: 0
  657. 2025-05-17 02:14:17,200 - sglang - INFO - [2025-05-17 02:14:17 TP0] Decode batch. #running-req: 6, #token: 15247, token usage: 0.40, gen throughput (token/s): 276.27, #queue-req: 0
  658. 2025-05-17 02:14:17,200 - __main__ - INFO - sglang running req: 6 queue req: 0
  659. 2025-05-17 02:14:17,484 - __main__ - INFO - Queue remaining: 0
  660. 2025-05-17 02:14:17,484 - __main__ - INFO -
  661. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  662. ----------------------------------------------------------------------------------
  663. sglang_input_tokens 27.73 27.73
  664. sglang_output_tokens 1.04 1.04
  665. 2025-05-17 02:14:17,484 - __main__ - INFO -
  666. Worker ID | finished | started
  667. ----------+----------+--------
  668. 0 | 2 | 8
  669. 2025-05-17 02:14:18,070 - sglang - INFO - [2025-05-17 02:14:18 TP0] Decode batch. #running-req: 6, #token: 15487, token usage: 0.41, gen throughput (token/s): 275.60, #queue-req: 0
  670. 2025-05-17 02:14:18,071 - __main__ - INFO - sglang running req: 6 queue req: 0
  671. 2025-05-17 02:14:18,942 - sglang - INFO - [2025-05-17 02:14:18 TP0] Decode batch. #running-req: 6, #token: 15727, token usage: 0.41, gen throughput (token/s): 275.41, #queue-req: 0
  672. 2025-05-17 02:14:18,942 - __main__ - INFO - sglang running req: 6 queue req: 0
  673. 2025-05-17 02:14:19,813 - sglang - INFO - [2025-05-17 02:14:19 TP0] Decode batch. #running-req: 5, #token: 13844, token usage: 0.36, gen throughput (token/s): 270.98, #queue-req: 0
  674. 2025-05-17 02:14:19,813 - __main__ - INFO - sglang running req: 5 queue req: 0
  675. 2025-05-17 02:14:20,680 - sglang - INFO - [2025-05-17 02:14:20 TP0] Decode batch. #running-req: 5, #token: 14044, token usage: 0.37, gen throughput (token/s): 230.75, #queue-req: 0
  676. 2025-05-17 02:14:20,680 - __main__ - INFO - sglang running req: 5 queue req: 0
  677. 2025-05-17 02:14:21,538 - sglang - INFO - [2025-05-17 02:14:21 TP0] Decode batch. #running-req: 3, #token: 8910, token usage: 0.23, gen throughput (token/s): 192.30, #queue-req: 0
  678. 2025-05-17 02:14:21,538 - __main__ - INFO - sglang running req: 3 queue req: 0
  679. 2025-05-17 02:14:22,384 - sglang - INFO - [2025-05-17 02:14:22 TP0] Decode batch. #running-req: 3, #token: 9030, token usage: 0.24, gen throughput (token/s): 141.83, #queue-req: 0
  680. 2025-05-17 02:14:22,384 - __main__ - INFO - sglang running req: 3 queue req: 0
  681. 2025-05-17 02:14:23,231 - sglang - INFO - [2025-05-17 02:14:23 TP0] Decode batch. #running-req: 3, #token: 9150, token usage: 0.24, gen throughput (token/s): 141.64, #queue-req: 0
  682. 2025-05-17 02:14:23,231 - __main__ - INFO - sglang running req: 3 queue req: 0
  683. 2025-05-17 02:14:24,072 - sglang - INFO - [2025-05-17 02:14:24 TP0] Decode batch. #running-req: 2, #token: 6371, token usage: 0.17, gen throughput (token/s): 116.49, #queue-req: 0
  684. 2025-05-17 02:14:24,072 - __main__ - INFO - sglang running req: 2 queue req: 0
  685. 2025-05-17 02:14:24,906 - sglang - INFO - [2025-05-17 02:14:24 TP0] Decode batch. #running-req: 2, #token: 6451, token usage: 0.17, gen throughput (token/s): 95.96, #queue-req: 0
  686. 2025-05-17 02:14:24,906 - __main__ - INFO - sglang running req: 2 queue req: 0
  687. 2025-05-17 02:14:25,734 - sglang - INFO - [2025-05-17 02:14:25 TP0] Decode batch. #running-req: 1, #token: 3584, token usage: 0.09, gen throughput (token/s): 54.32, #queue-req: 0
  688. 2025-05-17 02:14:25,734 - __main__ - INFO - sglang running req: 1 queue req: 0
  689. 2025-05-17 02:14:26,562 - sglang - INFO - [2025-05-17 02:14:26 TP0] Decode batch. #running-req: 1, #token: 3624, token usage: 0.10, gen throughput (token/s): 48.34, #queue-req: 0
  690. 2025-05-17 02:14:26,562 - __main__ - INFO - sglang running req: 1 queue req: 0
  691. 2025-05-17 02:14:27,389 - sglang - INFO - [2025-05-17 02:14:27 TP0] Decode batch. #running-req: 1, #token: 3664, token usage: 0.10, gen throughput (token/s): 48.32, #queue-req: 0
  692. 2025-05-17 02:14:27,390 - __main__ - INFO - sglang running req: 1 queue req: 0
  693. 2025-05-17 02:14:27,485 - __main__ - INFO - Queue remaining: 0
  694. 2025-05-17 02:14:27,485 - __main__ - INFO -
  695. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  696. ----------------------------------------------------------------------------------
  697. sglang_input_tokens 140.98 140.98
  698. sglang_output_tokens 27.79 27.79
  699. 2025-05-17 02:14:27,485 - __main__ - INFO -
  700. Worker ID | finished | started
  701. ----------+----------+--------
  702. 0 | 7 | 8
  703. 2025-05-17 02:14:28,218 - sglang - INFO - [2025-05-17 02:14:28 TP0] Decode batch. #running-req: 1, #token: 3704, token usage: 0.10, gen throughput (token/s): 48.29, #queue-req: 0
  704. 2025-05-17 02:14:28,218 - __main__ - INFO - sglang running req: 1 queue req: 0
  705. 2025-05-17 02:14:29,045 - sglang - INFO - [2025-05-17 02:14:29 TP0] Decode batch. #running-req: 1, #token: 3744, token usage: 0.10, gen throughput (token/s): 48.32, #queue-req: 0
  706. 2025-05-17 02:14:29,046 - __main__ - INFO - sglang running req: 1 queue req: 0
  707. 2025-05-17 02:14:29,873 - sglang - INFO - [2025-05-17 02:14:29 TP0] Decode batch. #running-req: 1, #token: 3784, token usage: 0.10, gen throughput (token/s): 48.34, #queue-req: 0
  708. 2025-05-17 02:14:29,873 - __main__ - INFO - sglang running req: 1 queue req: 0
  709. 2025-05-17 02:14:30,148 - __main__ - INFO - Finished TaskGroup for worker on 68de5843976d18df5ea068383feb21dd169b186d
  710. 2025-05-17 02:14:30,149 - __main__ - INFO - Got 1 docs for 68de5843976d18df5ea068383feb21dd169b186d
  711. 2025-05-17 02:14:30,150 - __main__ - INFO - Worker 0 exiting due to empty queue
  712. 2025-05-17 02:14:30,150 - __main__ - INFO - Work done
  713. 2025-05-17 02:14:30,151 - __main__ - INFO - Got cancellation request for SGLang server
  714. 2025-05-17 02:22:17,807 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  715. 2025-05-17 02:22:17,807 - __main__ - INFO - Loading file at olmocr_workspace/job_1747419731/input.pdf as PDF document
  716. 2025-05-17 02:22:17,807 - __main__ - INFO - Found 1 total pdf paths to add
  717. 2025-05-17 02:22:17,832 - __main__ - INFO - Calculated items_per_group: 7 based on average pages per PDF: 67.00
  718. 2025-05-17 02:22:18,076 - __main__ - INFO - Starting pipeline with PID 376290
  719. 2025-05-17 02:22:18,076 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  720. 2025-05-17 02:22:18,847 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  721. 2025-05-17 02:22:19,878 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  722. 2025-05-17 02:22:20,913 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  723. 2025-05-17 02:22:21,966 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  724. 2025-05-17 02:22:23,029 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  725. 2025-05-17 02:22:24,140 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  726. 2025-05-17 02:22:24,764 - sglang - INFO - [2025-05-17 02:22:24] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=562155338, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  727. 2025-05-17 02:22:24,764 - __main__ - INFO - [2025-05-17 02:22:24] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=562155338, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  728. 2025-05-17 02:22:25,216 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  729. 2025-05-17 02:22:26,281 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  730. 2025-05-17 02:22:27,348 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  731. 2025-05-17 02:22:28,033 - sglang - INFO - [2025-05-17 02:22:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  732. 2025-05-17 02:22:28,033 - __main__ - INFO - [2025-05-17 02:22:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  733. 2025-05-17 02:22:28,425 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  734. 2025-05-17 02:22:29,492 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  735. 2025-05-17 02:22:30,559 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  736. 2025-05-17 02:22:31,626 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  737. 2025-05-17 02:22:32,695 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  738. 2025-05-17 02:22:33,231 - sglang - INFO - [2025-05-17 02:22:33 TP0] Overlap scheduler is disabled for multimodal models.
  739. 2025-05-17 02:22:33,231 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Overlap scheduler is disabled for multimodal models.
  740. 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  741. 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  742. 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  743. 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  744. 2025-05-17 02:22:33,770 - sglang - INFO - [2025-05-17 02:22:33 TP0] Init torch distributed begin.
  745. 2025-05-17 02:22:33,770 - __main__ - INFO - [2025-05-17 02:22:33 TP0] Init torch distributed begin.
  746. 2025-05-17 02:22:33,771 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  747. 2025-05-17 02:22:34,844 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  748. 2025-05-17 02:22:35,908 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  749. 2025-05-17 02:22:36,964 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  750. 2025-05-17 02:22:38,026 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  751. 2025-05-17 02:22:39,092 - sglang - INFO - [2025-05-17 02:22:39 TP0] Load weight begin. avail mem=23.33 GB
  752. 2025-05-17 02:22:39,092 - __main__ - INFO - [2025-05-17 02:22:39 TP0] Load weight begin. avail mem=23.33 GB
  753. 2025-05-17 02:22:39,093 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  754. 2025-05-17 02:22:40,160 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  755. 2025-05-17 02:22:40,194 - sglang - INFO - [2025-05-17 02:22:40 TP0] Using model weights format ['*.safetensors']
  756. 2025-05-17 02:22:40,194 - __main__ - INFO - [2025-05-17 02:22:40 TP0] Using model weights format ['*.safetensors']
  757. 2025-05-17 02:22:40,679 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  758. 2025-05-17 02:22:40,679 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  759. 2025-05-17 02:22:41,004 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
  760. 2025-05-17 02:22:41,004 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
  761. 2025-05-17 02:22:41,238 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  762. 2025-05-17 02:22:42,072 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.31it/s]
  763. 2025-05-17 02:22:42,073 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.31it/s]
  764. 2025-05-17 02:22:42,317 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  765. 2025-05-17 02:22:43,146 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.11it/s]
  766. 2025-05-17 02:22:43,146 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.11it/s]
  767. 2025-05-17 02:22:43,395 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  768. 2025-05-17 02:22:44,205 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
  769. 2025-05-17 02:22:44,205 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
  770. 2025-05-17 02:22:44,205 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.13it/s]
  771. 2025-05-17 02:22:44,205 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.13it/s]
  772. 2025-05-17 02:22:44,205 - sglang - INFO -
  773. 2025-05-17 02:22:44,205 - __main__ - INFO -
  774. 2025-05-17 02:22:44,359 - sglang - INFO - [2025-05-17 02:22:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  775. 2025-05-17 02:22:44,359 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  776. 2025-05-17 02:22:44,366 - sglang - INFO - [2025-05-17 02:22:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  777. 2025-05-17 02:22:44,366 - __main__ - INFO - [2025-05-17 02:22:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  778. 2025-05-17 02:22:44,366 - sglang - INFO - [2025-05-17 02:22:44 TP0] Memory pool end. avail mem=5.30 GB
  779. 2025-05-17 02:22:44,366 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Memory pool end. avail mem=5.30 GB
  780. 2025-05-17 02:22:44,472 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  781. 2025-05-17 02:22:44,560 - sglang - INFO - [2025-05-17 02:22:44 TP0] Capture cuda graph begin. This can take up to several minutes.
  782. 2025-05-17 02:22:44,560 - __main__ - INFO - [2025-05-17 02:22:44 TP0] Capture cuda graph begin. This can take up to several minutes.
  783. 2025-05-17 02:22:45,549 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  784. 2025-05-17 02:22:46,181 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.12it/s] 50%|█████ | 2/4 [00:01<00:01, 1.96it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.58it/s] 100%|██████████| 4/4 [00:01<00:00, 3.03it/s] 100%|██████████| 4/4 [00:01<00:00, 2.47it/s]
  785. 2025-05-17 02:22:46,181 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.12it/s] 50%|█████ | 2/4 [00:01<00:01, 1.96it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.58it/s] 100%|██████████| 4/4 [00:01<00:00, 3.03it/s] 100%|██████████| 4/4 [00:01<00:00, 2.47it/s]
  786. 2025-05-17 02:22:46,181 - sglang - INFO - [2025-05-17 02:22:46 TP0] Capture cuda graph end. Time elapsed: 1.62 s
  787. 2025-05-17 02:22:46,181 - __main__ - INFO - [2025-05-17 02:22:46 TP0] Capture cuda graph end. Time elapsed: 1.62 s
  788. 2025-05-17 02:22:46,626 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  789. 2025-05-17 02:22:47,695 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  790. 2025-05-17 02:22:48,763 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  791. 2025-05-17 02:22:48,820 - sglang - INFO - [2025-05-17 02:22:48 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  792. 2025-05-17 02:22:48,821 - __main__ - INFO - [2025-05-17 02:22:48 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  793. 2025-05-17 02:22:49,856 - __main__ - INFO - sglang server is ready.
  794. 2025-05-17 02:22:49,857 - __main__ - INFO - Queue remaining: 1
  795. 2025-05-17 02:22:49,857 - __main__ - INFO -
  796. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  797. ----------------------------------------------------------------------------------
  798. 2025-05-17 02:22:49,857 - __main__ - INFO -
  799. Worker ID
  800. ---------
  801. 2025-05-17 02:22:49,857 - __main__ - INFO - Worker 0 processing work item a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
  802. 2025-05-17 02:22:49,857 - __main__ - INFO - Created all tasks for a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
  803. 2025-05-17 02:22:49,891 - __main__ - INFO - Got 67 pages to do for olmocr_workspace/job_1747419731/input.pdf in worker 0
  804. 2025-05-17 02:22:50,022 - sglang - INFO - [2025-05-17 02:22:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  805. 2025-05-17 02:22:50,023 - __main__ - INFO - [2025-05-17 02:22:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  806. 2025-05-17 02:22:50,023 - __main__ - INFO - sglang running req: 0 queue req: 0
  807. 2025-05-17 02:22:51,583 - sglang - INFO - [2025-05-17 02:22:51] The server is fired up and ready to roll!
  808. 2025-05-17 02:22:51,584 - __main__ - INFO - [2025-05-17 02:22:51] The server is fired up and ready to roll!
  809. 2025-05-17 02:22:59,858 - __main__ - INFO - Queue remaining: 0
  810. 2025-05-17 02:22:59,859 - __main__ - INFO -
  811. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  812. ----------------------------------------------------------------------------------
  813. 2025-05-17 02:22:59,859 - __main__ - INFO -
  814. Worker ID | started
  815. ----------+--------
  816. 0 | 67
  817. 2025-05-17 02:23:02,345 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-1
  818. 2025-05-17 02:23:02,372 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-2
  819. 2025-05-17 02:23:02,428 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-3
  820. 2025-05-17 02:23:02,432 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-4
  821. 2025-05-17 02:23:02,482 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-6
  822. 2025-05-17 02:23:02,490 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-5
  823. 2025-05-17 02:23:02,540 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-7
  824. 2025-05-17 02:23:02,553 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-9
  825. 2025-05-17 02:23:02,557 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-8
  826. 2025-05-17 02:23:02,610 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-13
  827. 2025-05-17 02:23:02,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-11
  828. 2025-05-17 02:23:02,629 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-14
  829. 2025-05-17 02:23:02,636 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-15
  830. 2025-05-17 02:23:02,645 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-16
  831. 2025-05-17 02:23:02,671 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-10
  832. 2025-05-17 02:23:02,672 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-18
  833. 2025-05-17 02:23:02,675 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-17
  834. 2025-05-17 02:23:02,678 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-19
  835. 2025-05-17 02:23:02,686 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-20
  836. 2025-05-17 02:23:02,700 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-22
  837. 2025-05-17 02:23:02,701 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-21
  838. 2025-05-17 02:23:02,707 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-12
  839. 2025-05-17 02:23:02,720 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-23
  840. 2025-05-17 02:23:02,733 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-24
  841. 2025-05-17 02:23:02,735 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-26
  842. 2025-05-17 02:23:02,744 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-27
  843. 2025-05-17 02:23:02,747 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-28
  844. 2025-05-17 02:23:02,761 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-29
  845. 2025-05-17 02:23:02,765 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-31
  846. 2025-05-17 02:23:02,767 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-30
  847. 2025-05-17 02:23:02,769 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-32
  848. 2025-05-17 02:23:02,796 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-33
  849. 2025-05-17 02:23:02,800 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-34
  850. 2025-05-17 02:23:02,817 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-41
  851. 2025-05-17 02:23:02,824 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-39
  852. 2025-05-17 02:23:02,825 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-25
  853. 2025-05-17 02:23:02,829 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-37
  854. 2025-05-17 02:23:02,855 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-42
  855. 2025-05-17 02:23:02,866 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-44
  856. 2025-05-17 02:23:02,867 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-43
  857. 2025-05-17 02:23:02,882 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-46
  858. 2025-05-17 02:23:02,886 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-52
  859. 2025-05-17 02:23:02,890 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-47
  860. 2025-05-17 02:23:02,891 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-48
  861. 2025-05-17 02:23:02,893 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-51
  862. 2025-05-17 02:23:02,897 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-50
  863. 2025-05-17 02:23:02,903 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-36
  864. 2025-05-17 02:23:02,913 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-35
  865. 2025-05-17 02:23:02,921 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-38
  866. 2025-05-17 02:23:02,923 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-53
  867. 2025-05-17 02:23:02,934 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-55
  868. 2025-05-17 02:23:02,937 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-58
  869. 2025-05-17 02:23:02,938 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-59
  870. 2025-05-17 02:23:02,938 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-66
  871. 2025-05-17 02:23:02,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-54
  872. 2025-05-17 02:23:02,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-40
  873. 2025-05-17 02:23:02,943 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-56
  874. 2025-05-17 02:23:02,944 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-67
  875. 2025-05-17 02:23:02,954 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-61
  876. 2025-05-17 02:23:02,981 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-57
  877. 2025-05-17 02:23:02,983 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-62
  878. 2025-05-17 02:23:02,984 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-60
  879. 2025-05-17 02:23:02,984 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-45
  880. 2025-05-17 02:23:02,985 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-65
  881. 2025-05-17 02:23:02,988 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-64
  882. 2025-05-17 02:23:02,989 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-63
  883. 2025-05-17 02:23:02,992 - __main__ - INFO - Built page query for olmocr_workspace/job_1747419731/input.pdf-49
  884. 2025-05-17 02:23:09,861 - __main__ - INFO - Queue remaining: 0
  885. 2025-05-17 02:23:09,861 - __main__ - INFO -
  886. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  887. ----------------------------------------------------------------------------------
  888. 2025-05-17 02:23:09,861 - __main__ - INFO -
  889. Worker ID | started
  890. ----------+--------
  891. 0 | 67
  892. 2025-05-17 02:23:14,583 - sglang - INFO - [2025-05-17 02:23:14 TP0] Prefill batch. #new-seq: 1, #new-token: 1171, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  893. 2025-05-17 02:23:14,584 - __main__ - INFO - sglang running req: 0 queue req: 0
  894. 2025-05-17 02:23:18,202 - sglang - INFO - [2025-05-17 02:23:18 TP0] Prefill batch. #new-seq: 7, #new-token: 12863, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 59
  895. 2025-05-17 02:23:18,203 - __main__ - INFO - sglang running req: 1 queue req: 59
  896. 2025-05-17 02:23:19,863 - __main__ - INFO - Queue remaining: 0
  897. 2025-05-17 02:23:19,863 - __main__ - INFO -
  898. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  899. ----------------------------------------------------------------------------------
  900. 2025-05-17 02:23:19,863 - __main__ - INFO -
  901. Worker ID | started
  902. ----------+--------
  903. 0 | 67
  904. 2025-05-17 02:23:24,325 - sglang - INFO - [2025-05-17 02:23:24 TP0] Decode batch. #running-req: 8, #token: 13094, token usage: 0.34, gen throughput (token/s): 7.63, #queue-req: 59
  905. 2025-05-17 02:23:24,325 - __main__ - INFO - sglang running req: 8 queue req: 59
  906. 2025-05-17 02:23:24,347 - sglang - INFO - [2025-05-17 02:23:24 TP0] Prefill batch. #new-seq: 2, #new-token: 3810, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.34, #running-req: 7, #queue-req: 57
  907. 2025-05-17 02:23:24,347 - __main__ - INFO - sglang running req: 7 queue req: 57
  908. 2025-05-17 02:23:26,480 - sglang - INFO - [2025-05-17 02:23:26 TP0] Decode batch. #running-req: 9, #token: 17262, token usage: 0.45, gen throughput (token/s): 166.10, #queue-req: 57
  909. 2025-05-17 02:23:26,481 - __main__ - INFO - sglang running req: 9 queue req: 57
  910. 2025-05-17 02:23:26,828 - sglang - INFO - [2025-05-17 02:23:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2193, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.42, #running-req: 8, #queue-req: 56
  911. 2025-05-17 02:23:26,828 - __main__ - INFO - sglang running req: 8 queue req: 56
  912. 2025-05-17 02:23:28,125 - sglang - INFO - [2025-05-17 02:23:28 TP0] Decode batch. #running-req: 9, #token: 18453, token usage: 0.49, gen throughput (token/s): 218.32, #queue-req: 56
  913. 2025-05-17 02:23:28,125 - __main__ - INFO - sglang running req: 9 queue req: 56
  914. 2025-05-17 02:23:29,055 - sglang - INFO - [2025-05-17 02:23:29 TP0] Decode batch. #running-req: 9, #token: 18813, token usage: 0.50, gen throughput (token/s): 386.98, #queue-req: 56
  915. 2025-05-17 02:23:29,055 - __main__ - INFO - sglang running req: 9 queue req: 56
  916. 2025-05-17 02:23:29,865 - __main__ - INFO - Queue remaining: 0
  917. 2025-05-17 02:23:29,865 - __main__ - INFO -
  918. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  919. ----------------------------------------------------------------------------------
  920. sglang_input_tokens 33.93 33.93
  921. sglang_output_tokens 1.69 1.69
  922. 2025-05-17 02:23:29,865 - __main__ - INFO -
  923. Worker ID | finished | started
  924. ----------+----------+--------
  925. 0 | 2 | 67
  926. 2025-05-17 02:23:29,985 - sglang - INFO - [2025-05-17 02:23:29 TP0] Decode batch. #running-req: 9, #token: 19173, token usage: 0.50, gen throughput (token/s): 387.01, #queue-req: 56
  927. 2025-05-17 02:23:29,985 - __main__ - INFO - sglang running req: 9 queue req: 56
  928. 2025-05-17 02:23:30,892 - sglang - INFO - [2025-05-17 02:23:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2008, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.47, #running-req: 8, #queue-req: 55
  929. 2025-05-17 02:23:30,892 - __main__ - INFO - sglang running req: 8 queue req: 55
  930. 2025-05-17 02:23:31,566 - sglang - INFO - [2025-05-17 02:23:31 TP0] Decode batch. #running-req: 9, #token: 19707, token usage: 0.52, gen throughput (token/s): 227.05, #queue-req: 55
  931. 2025-05-17 02:23:31,566 - __main__ - INFO - sglang running req: 9 queue req: 55
  932. 2025-05-17 02:23:32,380 - sglang - INFO - [2025-05-17 02:23:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4339, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.48, #running-req: 8, #queue-req: 53
  933. 2025-05-17 02:23:32,380 - __main__ - INFO - sglang running req: 8 queue req: 53
  934. 2025-05-17 02:23:33,878 - sglang - INFO - [2025-05-17 02:23:33 TP0] Decode batch. #running-req: 10, #token: 22444, token usage: 0.59, gen throughput (token/s): 157.44, #queue-req: 53
  935. 2025-05-17 02:23:33,879 - __main__ - INFO - sglang running req: 10 queue req: 53
  936. 2025-05-17 02:23:34,823 - sglang - INFO - [2025-05-17 02:23:34 TP0] Decode batch. #running-req: 10, #token: 22844, token usage: 0.60, gen throughput (token/s): 423.37, #queue-req: 53
  937. 2025-05-17 02:23:34,823 - __main__ - INFO - sglang running req: 10 queue req: 53
  938. 2025-05-17 02:23:35,201 - sglang - INFO - [2025-05-17 02:23:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2028, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 52
  939. 2025-05-17 02:23:35,201 - __main__ - INFO - sglang running req: 9 queue req: 52
  940. 2025-05-17 02:23:36,266 - sglang - INFO - [2025-05-17 02:23:36 TP0] Prefill batch. #new-seq: 1, #new-token: 1857, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 51
  941. 2025-05-17 02:23:36,266 - __main__ - INFO - sglang running req: 9 queue req: 51
  942. 2025-05-17 02:23:37,087 - sglang - INFO - [2025-05-17 02:23:37 TP0] Decode batch. #running-req: 10, #token: 22318, token usage: 0.59, gen throughput (token/s): 175.82, #queue-req: 51
  943. 2025-05-17 02:23:37,087 - __main__ - INFO - sglang running req: 10 queue req: 51
  944. 2025-05-17 02:23:37,181 - sglang - INFO - [2025-05-17 02:23:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1843, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 9, #queue-req: 50
  945. 2025-05-17 02:23:37,181 - __main__ - INFO - sglang running req: 9 queue req: 50
  946. 2025-05-17 02:23:37,889 - sglang - INFO - [2025-05-17 02:23:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1909, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.52, #running-req: 9, #queue-req: 49
  947. 2025-05-17 02:23:37,889 - __main__ - INFO - sglang running req: 9 queue req: 49
  948. 2025-05-17 02:23:39,356 - sglang - INFO - [2025-05-17 02:23:39 TP0] Decode batch. #running-req: 10, #token: 22012, token usage: 0.58, gen throughput (token/s): 175.38, #queue-req: 49
  949. 2025-05-17 02:23:39,356 - __main__ - INFO - sglang running req: 10 queue req: 49
  950. 2025-05-17 02:23:39,866 - __main__ - INFO - Queue remaining: 0
  951. 2025-05-17 02:23:39,866 - __main__ - INFO -
  952. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  953. ----------------------------------------------------------------------------------
  954. sglang_input_tokens 166.83 166.83
  955. sglang_output_tokens 23.70 23.70
  956. 2025-05-17 02:23:39,867 - __main__ - INFO -
  957. Worker ID | finished | started
  958. ----------+----------+--------
  959. 0 | 8 | 67
  960. 2025-05-17 02:23:40,300 - sglang - INFO - [2025-05-17 02:23:40 TP0] Decode batch. #running-req: 10, #token: 22412, token usage: 0.59, gen throughput (token/s): 423.76, #queue-req: 49
  961. 2025-05-17 02:23:40,300 - __main__ - INFO - sglang running req: 10 queue req: 49
  962. 2025-05-17 02:23:41,245 - sglang - INFO - [2025-05-17 02:23:41 TP0] Decode batch. #running-req: 10, #token: 22812, token usage: 0.60, gen throughput (token/s): 423.19, #queue-req: 49
  963. 2025-05-17 02:23:41,245 - __main__ - INFO - sglang running req: 10 queue req: 49
  964. 2025-05-17 02:23:41,388 - sglang - INFO - [2025-05-17 02:23:41 TP0] Prefill batch. #new-seq: 2, #new-token: 4101, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 9, #queue-req: 47
  965. 2025-05-17 02:23:41,388 - __main__ - INFO - sglang running req: 9 queue req: 47
  966. 2025-05-17 02:23:43,418 - sglang - INFO - [2025-05-17 02:23:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2197, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.59, #running-req: 10, #queue-req: 46
  967. 2025-05-17 02:23:43,419 - __main__ - INFO - sglang running req: 10 queue req: 46
  968. 2025-05-17 02:23:44,339 - sglang - INFO - [2025-05-17 02:23:44 TP0] Decode batch. #running-req: 11, #token: 24808, token usage: 0.65, gen throughput (token/s): 139.63, #queue-req: 46
  969. 2025-05-17 02:23:44,339 - __main__ - INFO - sglang running req: 11 queue req: 46
  970. 2025-05-17 02:23:45,222 - sglang - INFO - [2025-05-17 02:23:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3966, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 10, #queue-req: 44
  971. 2025-05-17 02:23:45,222 - __main__ - INFO - sglang running req: 10 queue req: 44
  972. 2025-05-17 02:23:46,590 - sglang - INFO - [2025-05-17 02:23:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2057, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 11, #queue-req: 43
  973. 2025-05-17 02:23:46,590 - __main__ - INFO - sglang running req: 11 queue req: 43
  974. 2025-05-17 02:23:47,330 - sglang - INFO - [2025-05-17 02:23:47 TP0] Decode batch. #running-req: 12, #token: 26764, token usage: 0.70, gen throughput (token/s): 147.43, #queue-req: 43
  975. 2025-05-17 02:23:47,331 - __main__ - INFO - sglang running req: 12 queue req: 43
  976. 2025-05-17 02:23:47,669 - sglang - INFO - [2025-05-17 02:23:47 TP0] Prefill batch. #new-seq: 1, #new-token: 1996, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 11, #queue-req: 42
  977. 2025-05-17 02:23:47,669 - __main__ - INFO - sglang running req: 11 queue req: 42
  978. 2025-05-17 02:23:48,979 - sglang - INFO - [2025-05-17 02:23:48 TP0] Decode batch. #running-req: 12, #token: 26496, token usage: 0.70, gen throughput (token/s): 290.61, #queue-req: 42
  979. 2025-05-17 02:23:48,979 - __main__ - INFO - sglang running req: 12 queue req: 42
  980. 2025-05-17 02:23:49,868 - __main__ - INFO - Queue remaining: 0
  981. 2025-05-17 02:23:49,869 - __main__ - INFO -
  982. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  983. ----------------------------------------------------------------------------------
  984. sglang_input_tokens 259.43 259.43
  985. sglang_output_tokens 40.62 40.62
  986. 2025-05-17 02:23:49,869 - __main__ - INFO -
  987. Worker ID | finished | started
  988. ----------+----------+--------
  989. 0 | 13 | 67
  990. 2025-05-17 02:23:49,942 - sglang - INFO - [2025-05-17 02:23:49 TP0] Decode batch. #running-req: 12, #token: 26976, token usage: 0.71, gen throughput (token/s): 498.13, #queue-req: 42
  991. 2025-05-17 02:23:49,942 - __main__ - INFO - sglang running req: 12 queue req: 42
  992. 2025-05-17 02:23:50,812 - sglang - INFO - [2025-05-17 02:23:50 TP0] Prefill batch. #new-seq: 2, #new-token: 4147, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 11, #queue-req: 40
  993. 2025-05-17 02:23:50,812 - __main__ - INFO - sglang running req: 11 queue req: 40
  994. 2025-05-17 02:23:52,309 - sglang - INFO - [2025-05-17 02:23:52 TP0] Decode batch. #running-req: 13, #token: 28833, token usage: 0.76, gen throughput (token/s): 204.08, #queue-req: 40
  995. 2025-05-17 02:23:52,309 - __main__ - INFO - sglang running req: 13 queue req: 40
  996. 2025-05-17 02:23:52,382 - sglang - INFO - [2025-05-17 02:23:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2124, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 39
  997. 2025-05-17 02:23:52,382 - __main__ - INFO - sglang running req: 12 queue req: 39
  998. 2025-05-17 02:23:53,998 - sglang - INFO - [2025-05-17 02:23:53 TP0] Decode batch. #running-req: 13, #token: 29315, token usage: 0.77, gen throughput (token/s): 307.21, #queue-req: 39
  999. 2025-05-17 02:23:53,999 - __main__ - INFO - sglang running req: 13 queue req: 39
  1000. 2025-05-17 02:23:54,072 - sglang - INFO - [2025-05-17 02:23:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 38
  1001. 2025-05-17 02:23:54,072 - __main__ - INFO - sglang running req: 12 queue req: 38
  1002. 2025-05-17 02:23:55,704 - sglang - INFO - [2025-05-17 02:23:55 TP0] Decode batch. #running-req: 13, #token: 29710, token usage: 0.78, gen throughput (token/s): 304.35, #queue-req: 38
  1003. 2025-05-17 02:23:55,704 - __main__ - INFO - sglang running req: 13 queue req: 38
  1004. 2025-05-17 02:23:55,777 - sglang - INFO - [2025-05-17 02:23:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1910, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 37
  1005. 2025-05-17 02:23:55,777 - __main__ - INFO - sglang running req: 12 queue req: 37
  1006. 2025-05-17 02:23:57,368 - sglang - INFO - [2025-05-17 02:23:57 TP0] Decode batch. #running-req: 13, #token: 29685, token usage: 0.78, gen throughput (token/s): 311.73, #queue-req: 37
  1007. 2025-05-17 02:23:57,369 - __main__ - INFO - sglang running req: 13 queue req: 37
  1008. 2025-05-17 02:23:57,540 - sglang - INFO - [2025-05-17 02:23:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1906, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 36
  1009. 2025-05-17 02:23:57,540 - __main__ - INFO - sglang running req: 12 queue req: 36
  1010. 2025-05-17 02:23:58,652 - sglang - INFO - [2025-05-17 02:23:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2143, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 35
  1011. 2025-05-17 02:23:58,653 - __main__ - INFO - sglang running req: 12 queue req: 35
  1012. 2025-05-17 02:23:59,441 - sglang - INFO - [2025-05-17 02:23:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2210, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 34
  1013. 2025-05-17 02:23:59,442 - __main__ - INFO - sglang running req: 12 queue req: 34
  1014. 2025-05-17 02:23:59,871 - __main__ - INFO - Queue remaining: 0
  1015. 2025-05-17 02:23:59,871 - __main__ - INFO -
  1016. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1017. ----------------------------------------------------------------------------------
  1018. sglang_input_tokens 371.41 371.41
  1019. sglang_output_tokens 62.10 62.10
  1020. 2025-05-17 02:23:59,871 - __main__ - INFO -
  1021. Worker ID | finished | started
  1022. ----------+----------+--------
  1023. 0 | 20 | 67
  1024. 2025-05-17 02:24:00,539 - sglang - INFO - [2025-05-17 02:24:00 TP0] Decode batch. #running-req: 13, #token: 29458, token usage: 0.78, gen throughput (token/s): 163.08, #queue-req: 34
  1025. 2025-05-17 02:24:00,539 - __main__ - INFO - sglang running req: 13 queue req: 34
  1026. 2025-05-17 02:24:00,906 - sglang - INFO - [2025-05-17 02:24:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2010, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 33
  1027. 2025-05-17 02:24:00,907 - __main__ - INFO - sglang running req: 12 queue req: 33
  1028. 2025-05-17 02:24:02,225 - sglang - INFO - [2025-05-17 02:24:02 TP0] Decode batch. #running-req: 13, #token: 29711, token usage: 0.78, gen throughput (token/s): 307.83, #queue-req: 33
  1029. 2025-05-17 02:24:02,225 - __main__ - INFO - sglang running req: 13 queue req: 33
  1030. 2025-05-17 02:24:02,667 - sglang - INFO - [2025-05-17 02:24:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1968, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 32
  1031. 2025-05-17 02:24:02,667 - __main__ - INFO - sglang running req: 12 queue req: 32
  1032. 2025-05-17 02:24:03,915 - sglang - INFO - [2025-05-17 02:24:03 TP0] Decode batch. #running-req: 13, #token: 27024, token usage: 0.71, gen throughput (token/s): 307.12, #queue-req: 32
  1033. 2025-05-17 02:24:03,915 - __main__ - INFO - sglang running req: 13 queue req: 32
  1034. 2025-05-17 02:24:03,939 - sglang - INFO - [2025-05-17 02:24:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1958, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 31
  1035. 2025-05-17 02:24:03,939 - __main__ - INFO - sglang running req: 12 queue req: 31
  1036. 2025-05-17 02:24:04,935 - sglang - INFO - [2025-05-17 02:24:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 30
  1037. 2025-05-17 02:24:04,936 - __main__ - INFO - sglang running req: 12 queue req: 30
  1038. 2025-05-17 02:24:06,303 - sglang - INFO - [2025-05-17 02:24:06 TP0] Decode batch. #running-req: 13, #token: 28972, token usage: 0.76, gen throughput (token/s): 216.92, #queue-req: 30
  1039. 2025-05-17 02:24:06,303 - __main__ - INFO - sglang running req: 13 queue req: 30
  1040. 2025-05-17 02:24:07,088 - sglang - INFO - [2025-05-17 02:24:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2210, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 29
  1041. 2025-05-17 02:24:07,088 - __main__ - INFO - sglang running req: 12 queue req: 29
  1042. 2025-05-17 02:24:08,064 - sglang - INFO - [2025-05-17 02:24:08 TP0] Decode batch. #running-req: 13, #token: 29012, token usage: 0.76, gen throughput (token/s): 294.63, #queue-req: 29
  1043. 2025-05-17 02:24:08,064 - __main__ - INFO - sglang running req: 13 queue req: 29
  1044. 2025-05-17 02:24:09,054 - sglang - INFO - [2025-05-17 02:24:09 TP0] Decode batch. #running-req: 13, #token: 29532, token usage: 0.78, gen throughput (token/s): 525.44, #queue-req: 29
  1045. 2025-05-17 02:24:09,054 - __main__ - INFO - sglang running req: 13 queue req: 29
  1046. 2025-05-17 02:24:09,872 - __main__ - INFO - Queue remaining: 0
  1047. 2025-05-17 02:24:09,873 - __main__ - INFO -
  1048. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1049. ----------------------------------------------------------------------------------
  1050. sglang_input_tokens 431.29 431.29
  1051. sglang_output_tokens 75.72 75.72
  1052. 2025-05-17 02:24:09,873 - __main__ - INFO -
  1053. Worker ID | finished | started
  1054. ----------+----------+--------
  1055. 0 | 25 | 67
  1056. 2025-05-17 02:24:10,037 - sglang - INFO - [2025-05-17 02:24:10 TP0] Decode batch. #running-req: 13, #token: 30052, token usage: 0.79, gen throughput (token/s): 528.96, #queue-req: 29
  1057. 2025-05-17 02:24:10,037 - __main__ - INFO - sglang running req: 13 queue req: 29
  1058. 2025-05-17 02:24:10,531 - sglang - INFO - [2025-05-17 02:24:10 TP0] Prefill batch. #new-seq: 1, #new-token: 1977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 28
  1059. 2025-05-17 02:24:10,532 - __main__ - INFO - sglang running req: 12 queue req: 28
  1060. 2025-05-17 02:24:11,732 - sglang - INFO - [2025-05-17 02:24:11 TP0] Decode batch. #running-req: 13, #token: 30070, token usage: 0.79, gen throughput (token/s): 306.20, #queue-req: 28
  1061. 2025-05-17 02:24:11,732 - __main__ - INFO - sglang running req: 13 queue req: 28
  1062. 2025-05-17 02:24:11,831 - sglang - INFO - [2025-05-17 02:24:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1920, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 27
  1063. 2025-05-17 02:24:11,831 - __main__ - INFO - sglang running req: 12 queue req: 27
  1064. 2025-05-17 02:24:12,613 - sglang - INFO - [2025-05-17 02:24:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1953, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 26
  1065. 2025-05-17 02:24:12,613 - __main__ - INFO - sglang running req: 12 queue req: 26
  1066. 2025-05-17 02:24:13,528 - sglang - INFO - [2025-05-17 02:24:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2056, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 25
  1067. 2025-05-17 02:24:13,528 - __main__ - INFO - sglang running req: 12 queue req: 25
  1068. 2025-05-17 02:24:14,834 - sglang - INFO - [2025-05-17 02:24:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2109, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 24
  1069. 2025-05-17 02:24:14,834 - __main__ - INFO - sglang running req: 12 queue req: 24
  1070. 2025-05-17 02:24:15,617 - sglang - INFO - [2025-05-17 02:24:15 TP0] Decode batch. #running-req: 13, #token: 29043, token usage: 0.76, gen throughput (token/s): 132.83, #queue-req: 24
  1071. 2025-05-17 02:24:15,617 - __main__ - INFO - sglang running req: 13 queue req: 24
  1072. 2025-05-17 02:24:15,666 - sglang - INFO - [2025-05-17 02:24:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1907, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 23
  1073. 2025-05-17 02:24:15,667 - __main__ - INFO - sglang running req: 12 queue req: 23
  1074. 2025-05-17 02:24:16,815 - sglang - INFO - [2025-05-17 02:24:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2241, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 22
  1075. 2025-05-17 02:24:16,815 - __main__ - INFO - sglang running req: 12 queue req: 22
  1076. 2025-05-17 02:24:18,108 - sglang - INFO - [2025-05-17 02:24:18 TP0] Decode batch. #running-req: 13, #token: 28773, token usage: 0.76, gen throughput (token/s): 207.93, #queue-req: 22
  1077. 2025-05-17 02:24:18,108 - __main__ - INFO - sglang running req: 13 queue req: 22
  1078. 2025-05-17 02:24:18,404 - sglang - INFO - [2025-05-17 02:24:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2175, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 21
  1079. 2025-05-17 02:24:18,405 - __main__ - INFO - sglang running req: 12 queue req: 21
  1080. 2025-05-17 02:24:19,827 - sglang - INFO - [2025-05-17 02:24:19 TP0] Decode batch. #running-req: 13, #token: 29186, token usage: 0.77, gen throughput (token/s): 301.88, #queue-req: 21
  1081. 2025-05-17 02:24:19,827 - __main__ - INFO - sglang running req: 13 queue req: 21
  1082. 2025-05-17 02:24:19,874 - __main__ - INFO - Queue remaining: 0
  1083. 2025-05-17 02:24:19,874 - __main__ - INFO -
  1084. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1085. ----------------------------------------------------------------------------------
  1086. sglang_input_tokens 528.39 528.39
  1087. sglang_output_tokens 95.05 95.05
  1088. 2025-05-17 02:24:19,874 - __main__ - INFO -
  1089. Worker ID | finished | started
  1090. ----------+----------+--------
  1091. 0 | 33 | 67
  1092. 2025-05-17 02:24:20,470 - sglang - INFO - [2025-05-17 02:24:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1944, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 20
  1093. 2025-05-17 02:24:20,471 - __main__ - INFO - sglang running req: 12 queue req: 20
  1094. 2025-05-17 02:24:21,530 - sglang - INFO - [2025-05-17 02:24:21 TP0] Decode batch. #running-req: 13, #token: 29065, token usage: 0.77, gen throughput (token/s): 304.70, #queue-req: 20
  1095. 2025-05-17 02:24:21,531 - __main__ - INFO - sglang running req: 13 queue req: 20
  1096. 2025-05-17 02:24:22,002 - sglang - INFO - [2025-05-17 02:24:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2014, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 19
  1097. 2025-05-17 02:24:22,002 - __main__ - INFO - sglang running req: 12 queue req: 19
  1098. 2025-05-17 02:24:23,065 - sglang - INFO - [2025-05-17 02:24:23 TP0] Prefill batch. #new-seq: 1, #new-token: 1983, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 18
  1099. 2025-05-17 02:24:23,066 - __main__ - INFO - sglang running req: 12 queue req: 18
  1100. 2025-05-17 02:24:23,954 - sglang - INFO - [2025-05-17 02:24:23 TP0] Decode batch. #running-req: 13, #token: 28964, token usage: 0.76, gen throughput (token/s): 213.75, #queue-req: 18
  1101. 2025-05-17 02:24:23,954 - __main__ - INFO - sglang running req: 13 queue req: 18
  1102. 2025-05-17 02:24:24,946 - sglang - INFO - [2025-05-17 02:24:24 TP0] Decode batch. #running-req: 13, #token: 29484, token usage: 0.78, gen throughput (token/s): 524.27, #queue-req: 18
  1103. 2025-05-17 02:24:24,946 - __main__ - INFO - sglang running req: 13 queue req: 18
  1104. 2025-05-17 02:24:25,933 - sglang - INFO - [2025-05-17 02:24:25 TP0] Decode batch. #running-req: 13, #token: 30004, token usage: 0.79, gen throughput (token/s): 526.64, #queue-req: 18
  1105. 2025-05-17 02:24:25,933 - __main__ - INFO - sglang running req: 13 queue req: 18
  1106. 2025-05-17 02:24:26,680 - sglang - INFO - [2025-05-17 02:24:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2098, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 17
  1107. 2025-05-17 02:24:26,681 - __main__ - INFO - sglang running req: 12 queue req: 17
  1108. 2025-05-17 02:24:27,679 - sglang - INFO - [2025-05-17 02:24:27 TP0] Decode batch. #running-req: 13, #token: 29809, token usage: 0.78, gen throughput (token/s): 297.21, #queue-req: 17
  1109. 2025-05-17 02:24:27,680 - __main__ - INFO - sglang running req: 13 queue req: 17
  1110. 2025-05-17 02:24:27,803 - sglang - INFO - [2025-05-17 02:24:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 16
  1111. 2025-05-17 02:24:27,804 - __main__ - INFO - sglang running req: 12 queue req: 16
  1112. 2025-05-17 02:24:28,573 - sglang - INFO - [2025-05-17 02:24:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2213, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 15
  1113. 2025-05-17 02:24:28,574 - __main__ - INFO - sglang running req: 12 queue req: 15
  1114. 2025-05-17 02:24:29,472 - sglang - INFO - [2025-05-17 02:24:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 14
  1115. 2025-05-17 02:24:29,473 - __main__ - INFO - sglang running req: 12 queue req: 14
  1116. 2025-05-17 02:24:29,875 - __main__ - INFO - Queue remaining: 0
  1117. 2025-05-17 02:24:29,876 - __main__ - INFO -
  1118. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1119. ----------------------------------------------------------------------------------
  1120. sglang_input_tokens 595.63 595.63
  1121. sglang_output_tokens 109.08 109.08
  1122. 2025-05-17 02:24:29,876 - __main__ - INFO -
  1123. Worker ID | finished | started
  1124. ----------+----------+--------
  1125. 0 | 40 | 67
  1126. 2025-05-17 02:24:30,432 - sglang - INFO - [2025-05-17 02:24:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1929, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 13
  1127. 2025-05-17 02:24:30,432 - __main__ - INFO - sglang running req: 12 queue req: 13
  1128. 2025-05-17 02:24:31,420 - sglang - INFO - [2025-05-17 02:24:31 TP0] Decode batch. #running-req: 12, #token: 27081, token usage: 0.71, gen throughput (token/s): 137.67, #queue-req: 13
  1129. 2025-05-17 02:24:31,420 - __main__ - INFO - sglang running req: 12 queue req: 13
  1130. 2025-05-17 02:24:31,421 - sglang - INFO - [2025-05-17 02:24:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 12
  1131. 2025-05-17 02:24:31,421 - __main__ - INFO - sglang running req: 12 queue req: 12
  1132. 2025-05-17 02:24:33,054 - sglang - INFO - [2025-05-17 02:24:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 11
  1133. 2025-05-17 02:24:33,054 - __main__ - INFO - sglang running req: 12 queue req: 11
  1134. 2025-05-17 02:24:33,658 - sglang - INFO - [2025-05-17 02:24:33 TP0] Decode batch. #running-req: 13, #token: 28368, token usage: 0.75, gen throughput (token/s): 231.94, #queue-req: 11
  1135. 2025-05-17 02:24:33,658 - __main__ - INFO - sglang running req: 13 queue req: 11
  1136. 2025-05-17 02:24:33,979 - sglang - INFO - [2025-05-17 02:24:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2086, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 10
  1137. 2025-05-17 02:24:33,979 - __main__ - INFO - sglang running req: 12 queue req: 10
  1138. 2025-05-17 02:24:35,208 - sglang - INFO - [2025-05-17 02:24:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2010, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 9
  1139. 2025-05-17 02:24:35,208 - __main__ - INFO - sglang running req: 12 queue req: 9
  1140. 2025-05-17 02:24:36,127 - sglang - INFO - [2025-05-17 02:24:36 TP0] Decode batch. #running-req: 13, #token: 29602, token usage: 0.78, gen throughput (token/s): 209.83, #queue-req: 9
  1141. 2025-05-17 02:24:36,127 - __main__ - INFO - sglang running req: 13 queue req: 9
  1142. 2025-05-17 02:24:36,499 - sglang - INFO - [2025-05-17 02:24:36 TP0] Prefill batch. #new-seq: 1, #new-token: 1885, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 8
  1143. 2025-05-17 02:24:36,499 - __main__ - INFO - sglang running req: 12 queue req: 8
  1144. 2025-05-17 02:24:37,835 - sglang - INFO - [2025-05-17 02:24:37 TP0] Decode batch. #running-req: 12, #token: 26663, token usage: 0.70, gen throughput (token/s): 303.22, #queue-req: 8
  1145. 2025-05-17 02:24:37,835 - __main__ - INFO - sglang running req: 12 queue req: 8
  1146. 2025-05-17 02:24:37,835 - sglang - INFO - [2025-05-17 02:24:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 12, #queue-req: 7
  1147. 2025-05-17 02:24:37,835 - __main__ - INFO - sglang running req: 12 queue req: 7
  1148. 2025-05-17 02:24:39,541 - sglang - INFO - [2025-05-17 02:24:39 TP0] Decode batch. #running-req: 13, #token: 29091, token usage: 0.77, gen throughput (token/s): 304.79, #queue-req: 7
  1149. 2025-05-17 02:24:39,541 - __main__ - INFO - sglang running req: 13 queue req: 7
  1150. 2025-05-17 02:24:39,640 - sglang - INFO - [2025-05-17 02:24:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2123, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 6
  1151. 2025-05-17 02:24:39,641 - __main__ - INFO - sglang running req: 12 queue req: 6
  1152. 2025-05-17 02:24:39,878 - __main__ - INFO - Queue remaining: 0
  1153. 2025-05-17 02:24:39,879 - __main__ - INFO -
  1154. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1155. ----------------------------------------------------------------------------------
  1156. sglang_input_tokens 654.89 654.89
  1157. sglang_output_tokens 118.22 118.22
  1158. 2025-05-17 02:24:39,879 - __main__ - INFO -
  1159. Worker ID | finished | started
  1160. ----------+----------+--------
  1161. 0 | 48 | 67
  1162. 2025-05-17 02:24:41,274 - sglang - INFO - [2025-05-17 02:24:41 TP0] Decode batch. #running-req: 13, #token: 29420, token usage: 0.77, gen throughput (token/s): 299.44, #queue-req: 6
  1163. 2025-05-17 02:24:41,274 - __main__ - INFO - sglang running req: 13 queue req: 6
  1164. 2025-05-17 02:24:42,268 - sglang - INFO - [2025-05-17 02:24:42 TP0] Decode batch. #running-req: 13, #token: 29940, token usage: 0.79, gen throughput (token/s): 523.23, #queue-req: 6
  1165. 2025-05-17 02:24:42,268 - __main__ - INFO - sglang running req: 13 queue req: 6
  1166. 2025-05-17 02:24:43,264 - sglang - INFO - [2025-05-17 02:24:43 TP0] Decode batch. #running-req: 13, #token: 30460, token usage: 0.80, gen throughput (token/s): 522.07, #queue-req: 6
  1167. 2025-05-17 02:24:43,264 - __main__ - INFO - sglang running req: 13 queue req: 6
  1168. 2025-05-17 02:24:43,464 - sglang - INFO - [2025-05-17 02:24:43 TP0] Prefill batch. #new-seq: 1, #new-token: 1915, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 12, #queue-req: 5
  1169. 2025-05-17 02:24:43,464 - __main__ - INFO - sglang running req: 12 queue req: 5
  1170. 2025-05-17 02:24:44,500 - sglang - INFO - [2025-05-17 02:24:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 4
  1171. 2025-05-17 02:24:44,501 - __main__ - INFO - sglang running req: 12 queue req: 4
  1172. 2025-05-17 02:24:45,398 - sglang - INFO - [2025-05-17 02:24:45 TP0] Prefill batch. #new-seq: 1, #new-token: 1938, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 12, #queue-req: 3
  1173. 2025-05-17 02:24:45,398 - __main__ - INFO - sglang running req: 12 queue req: 3
  1174. 2025-05-17 02:24:46,270 - sglang - INFO - [2025-05-17 02:24:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2166, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 2
  1175. 2025-05-17 02:24:46,270 - __main__ - INFO - sglang running req: 12 queue req: 2
  1176. 2025-05-17 02:24:47,163 - sglang - INFO - [2025-05-17 02:24:47 TP0] Decode batch. #running-req: 13, #token: 28597, token usage: 0.75, gen throughput (token/s): 132.35, #queue-req: 2
  1177. 2025-05-17 02:24:47,163 - __main__ - INFO - sglang running req: 13 queue req: 2
  1178. 2025-05-17 02:24:47,262 - sglang - INFO - [2025-05-17 02:24:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2186, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 12, #queue-req: 1
  1179. 2025-05-17 02:24:47,263 - __main__ - INFO - sglang running req: 12 queue req: 1
  1180. 2025-05-17 02:24:48,964 - sglang - INFO - [2025-05-17 02:24:48 TP0] Decode batch. #running-req: 13, #token: 29041, token usage: 0.76, gen throughput (token/s): 288.14, #queue-req: 1
  1181. 2025-05-17 02:24:48,964 - __main__ - INFO - sglang running req: 13 queue req: 1
  1182. 2025-05-17 02:24:49,014 - sglang - INFO - [2025-05-17 02:24:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2101, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 12, #queue-req: 0
  1183. 2025-05-17 02:24:49,014 - __main__ - INFO - sglang running req: 12 queue req: 0
  1184. 2025-05-17 02:24:49,301 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  1185. 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 1 exiting due to empty queue
  1186. 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 2 exiting due to empty queue
  1187. 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 3 exiting due to empty queue
  1188. 2025-05-17 02:24:49,301 - __main__ - INFO - Worker 4 exiting due to empty queue
  1189. 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 5 exiting due to empty queue
  1190. 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 6 exiting due to empty queue
  1191. 2025-05-17 02:24:49,302 - __main__ - INFO - Worker 7 exiting due to empty queue
  1192. 2025-05-17 02:24:49,880 - __main__ - INFO - Queue remaining: 0
  1193. 2025-05-17 02:24:49,880 - __main__ - INFO -
  1194. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1195. ----------------------------------------------------------------------------------
  1196. sglang_input_tokens 692.31 692.31
  1197. sglang_output_tokens 126.72 126.72
  1198. 2025-05-17 02:24:49,880 - __main__ - INFO -
  1199. Worker ID | finished | started
  1200. ----------+----------+--------
  1201. 0 | 54 | 67
  1202. 2025-05-17 02:24:50,720 - sglang - INFO - [2025-05-17 02:24:50 TP0] Decode batch. #running-req: 13, #token: 29512, token usage: 0.78, gen throughput (token/s): 295.57, #queue-req: 0
  1203. 2025-05-17 02:24:50,720 - __main__ - INFO - sglang running req: 13 queue req: 0
  1204. 2025-05-17 02:24:51,690 - sglang - INFO - [2025-05-17 02:24:51 TP0] Decode batch. #running-req: 9, #token: 20501, token usage: 0.54, gen throughput (token/s): 410.23, #queue-req: 0
  1205. 2025-05-17 02:24:51,690 - __main__ - INFO - sglang running req: 9 queue req: 0
  1206. 2025-05-17 02:24:52,652 - sglang - INFO - [2025-05-17 02:24:52 TP0] Decode batch. #running-req: 9, #token: 20861, token usage: 0.55, gen throughput (token/s): 374.15, #queue-req: 0
  1207. 2025-05-17 02:24:52,653 - __main__ - INFO - sglang running req: 9 queue req: 0
  1208. 2025-05-17 02:24:53,615 - sglang - INFO - [2025-05-17 02:24:53 TP0] Decode batch. #running-req: 9, #token: 21221, token usage: 0.56, gen throughput (token/s): 373.93, #queue-req: 0
  1209. 2025-05-17 02:24:53,615 - __main__ - INFO - sglang running req: 9 queue req: 0
  1210. 2025-05-17 02:24:54,519 - sglang - INFO - [2025-05-17 02:24:54 TP0] Decode batch. #running-req: 5, #token: 11588, token usage: 0.31, gen throughput (token/s): 295.32, #queue-req: 0
  1211. 2025-05-17 02:24:54,519 - __main__ - INFO - sglang running req: 5 queue req: 0
  1212. 2025-05-17 02:24:55,410 - sglang - INFO - [2025-05-17 02:24:55 TP0] Decode batch. #running-req: 5, #token: 11788, token usage: 0.31, gen throughput (token/s): 224.53, #queue-req: 0
  1213. 2025-05-17 02:24:55,410 - __main__ - INFO - sglang running req: 5 queue req: 0
  1214. 2025-05-17 02:24:56,299 - sglang - INFO - [2025-05-17 02:24:56 TP0] Decode batch. #running-req: 5, #token: 11988, token usage: 0.32, gen throughput (token/s): 225.02, #queue-req: 0
  1215. 2025-05-17 02:24:56,299 - __main__ - INFO - sglang running req: 5 queue req: 0
  1216. 2025-05-17 02:24:57,181 - sglang - INFO - [2025-05-17 02:24:57 TP0] Decode batch. #running-req: 4, #token: 9907, token usage: 0.26, gen throughput (token/s): 197.18, #queue-req: 0
  1217. 2025-05-17 02:24:57,181 - __main__ - INFO - sglang running req: 4 queue req: 0
  1218. 2025-05-17 02:24:58,058 - sglang - INFO - [2025-05-17 02:24:58 TP0] Decode batch. #running-req: 4, #token: 10067, token usage: 0.27, gen throughput (token/s): 182.43, #queue-req: 0
  1219. 2025-05-17 02:24:58,058 - __main__ - INFO - sglang running req: 4 queue req: 0
  1220. 2025-05-17 02:24:58,933 - sglang - INFO - [2025-05-17 02:24:58 TP0] Decode batch. #running-req: 1, #token: 2719, token usage: 0.07, gen throughput (token/s): 162.32, #queue-req: 0
  1221. 2025-05-17 02:24:58,933 - __main__ - INFO - sglang running req: 1 queue req: 0
  1222. 2025-05-17 02:24:58,961 - __main__ - INFO - Finished TaskGroup for worker on a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
  1223. 2025-05-17 02:24:58,961 - __main__ - INFO - Got 1 docs for a47ce4ecdd6200876f5b8de00bb3ccbac96ba956
  1224. 2025-05-17 02:24:58,963 - __main__ - INFO - Worker 0 exiting due to empty queue
  1225. 2025-05-17 02:24:58,964 - __main__ - INFO - Work done
  1226. 2025-05-17 02:24:58,964 - __main__ - INFO - Got cancellation request for SGLang server
  1227. 2025-05-17 02:38:13,235 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  1228. 2025-05-17 02:38:13,235 - __main__ - INFO - Loading file at olmocr_workspace/job_1747420686/input.pdf as PDF document
  1229. 2025-05-17 02:38:13,235 - __main__ - INFO - Found 1 total pdf paths to add
  1230. 2025-05-17 02:38:13,241 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  1231. 2025-05-17 02:38:13,477 - __main__ - INFO - Starting pipeline with PID 379816
  1232. 2025-05-17 02:38:13,477 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  1233. 2025-05-17 02:38:19,275 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  1234. 2025-05-17 02:38:20,316 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  1235. 2025-05-17 02:38:21,360 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  1236. 2025-05-17 02:38:22,423 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  1237. 2025-05-17 02:38:23,492 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  1238. 2025-05-17 02:38:24,570 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  1239. 2025-05-17 02:38:25,634 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  1240. 2025-05-17 02:38:25,675 - sglang - INFO - [2025-05-17 02:38:25] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1020889166, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1241. 2025-05-17 02:38:25,675 - __main__ - INFO - [2025-05-17 02:38:25] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1020889166, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1242. 2025-05-17 02:38:26,714 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  1243. 2025-05-17 02:38:27,757 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  1244. 2025-05-17 02:38:28,801 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  1245. 2025-05-17 02:38:29,844 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  1246. 2025-05-17 02:38:30,891 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  1247. 2025-05-17 02:38:31,931 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  1248. 2025-05-17 02:38:32,958 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  1249. 2025-05-17 02:38:34,016 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  1250. 2025-05-17 02:38:34,755 - sglang - INFO - [2025-05-17 02:38:34] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1251. 2025-05-17 02:38:34,755 - __main__ - INFO - [2025-05-17 02:38:34] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1252. 2025-05-17 02:38:35,089 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  1253. 2025-05-17 02:38:36,156 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  1254. 2025-05-17 02:38:37,224 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  1255. 2025-05-17 02:38:38,293 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  1256. 2025-05-17 02:38:39,354 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  1257. 2025-05-17 02:38:40,405 - sglang - INFO - [2025-05-17 02:38:40 TP0] Overlap scheduler is disabled for multimodal models.
  1258. 2025-05-17 02:38:40,405 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Overlap scheduler is disabled for multimodal models.
  1259. 2025-05-17 02:38:40,407 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  1260. 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1261. 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1262. 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1263. 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1264. 2025-05-17 02:38:40,933 - sglang - INFO - [2025-05-17 02:38:40 TP0] Init torch distributed begin.
  1265. 2025-05-17 02:38:40,933 - __main__ - INFO - [2025-05-17 02:38:40 TP0] Init torch distributed begin.
  1266. 2025-05-17 02:38:41,480 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  1267. 2025-05-17 02:38:42,548 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  1268. 2025-05-17 02:38:43,616 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  1269. 2025-05-17 02:38:44,683 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  1270. 2025-05-17 02:38:45,756 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  1271. 2025-05-17 02:38:46,297 - sglang - INFO - [2025-05-17 02:38:46 TP0] Load weight begin. avail mem=23.33 GB
  1272. 2025-05-17 02:38:46,298 - __main__ - INFO - [2025-05-17 02:38:46 TP0] Load weight begin. avail mem=23.33 GB
  1273. 2025-05-17 02:38:46,813 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  1274. 2025-05-17 02:38:47,441 - sglang - INFO - [2025-05-17 02:38:47 TP0] Using model weights format ['*.safetensors']
  1275. 2025-05-17 02:38:47,441 - __main__ - INFO - [2025-05-17 02:38:47 TP0] Using model weights format ['*.safetensors']
  1276. 2025-05-17 02:38:47,868 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  1277. 2025-05-17 02:38:47,932 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  1278. 2025-05-17 02:38:47,933 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  1279. 2025-05-17 02:38:48,258 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
  1280. 2025-05-17 02:38:48,258 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.08it/s]
  1281. 2025-05-17 02:38:48,920 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  1282. 2025-05-17 02:38:49,257 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.39it/s]
  1283. 2025-05-17 02:38:49,257 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.39it/s]
  1284. 2025-05-17 02:38:49,974 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  1285. 2025-05-17 02:38:50,250 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
  1286. 2025-05-17 02:38:50,250 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.18it/s]
  1287. 2025-05-17 02:38:51,028 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  1288. 2025-05-17 02:38:51,216 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  1289. 2025-05-17 02:38:51,217 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  1290. 2025-05-17 02:38:51,217 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.22it/s]
  1291. 2025-05-17 02:38:51,217 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.22it/s]
  1292. 2025-05-17 02:38:51,217 - sglang - INFO -
  1293. 2025-05-17 02:38:51,217 - __main__ - INFO -
  1294. 2025-05-17 02:38:51,374 - sglang - INFO - [2025-05-17 02:38:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  1295. 2025-05-17 02:38:51,374 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  1296. 2025-05-17 02:38:51,381 - sglang - INFO - [2025-05-17 02:38:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  1297. 2025-05-17 02:38:51,381 - __main__ - INFO - [2025-05-17 02:38:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  1298. 2025-05-17 02:38:51,381 - sglang - INFO - [2025-05-17 02:38:51 TP0] Memory pool end. avail mem=5.30 GB
  1299. 2025-05-17 02:38:51,381 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Memory pool end. avail mem=5.30 GB
  1300. 2025-05-17 02:38:51,569 - sglang - INFO - [2025-05-17 02:38:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  1301. 2025-05-17 02:38:51,569 - __main__ - INFO - [2025-05-17 02:38:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  1302. 2025-05-17 02:38:52,074 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  1303. 2025-05-17 02:38:53,118 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  1304. 2025-05-17 02:38:53,375 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.05s/it] 50%|█████ | 2/4 [00:01<00:01, 1.73it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
  1305. 2025-05-17 02:38:53,375 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.05s/it] 50%|█████ | 2/4 [00:01<00:01, 1.73it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
  1306. 2025-05-17 02:38:53,375 - sglang - INFO - [2025-05-17 02:38:53 TP0] Capture cuda graph end. Time elapsed: 1.81 s
  1307. 2025-05-17 02:38:53,375 - __main__ - INFO - [2025-05-17 02:38:53 TP0] Capture cuda graph end. Time elapsed: 1.81 s
  1308. 2025-05-17 02:38:54,182 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  1309. 2025-05-17 02:38:55,246 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  1310. 2025-05-17 02:38:55,839 - sglang - INFO - [2025-05-17 02:38:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  1311. 2025-05-17 02:38:55,839 - __main__ - INFO - [2025-05-17 02:38:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  1312. 2025-05-17 02:38:56,302 - __main__ - INFO - sglang server is ready.
  1313. 2025-05-17 02:38:56,302 - __main__ - INFO - Queue remaining: 1
  1314. 2025-05-17 02:38:56,302 - __main__ - INFO -
  1315. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1316. ----------------------------------------------------------------------------------
  1317. 2025-05-17 02:38:56,303 - __main__ - INFO -
  1318. Worker ID
  1319. ---------
  1320. 2025-05-17 02:38:56,303 - __main__ - INFO - Worker 0 processing work item 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
  1321. 2025-05-17 02:38:56,303 - __main__ - INFO - Created all tasks for 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
  1322. 2025-05-17 02:38:56,308 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747420686/input.pdf in worker 0
  1323. 2025-05-17 02:38:56,913 - sglang - INFO - [2025-05-17 02:38:56 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  1324. 2025-05-17 02:38:56,913 - __main__ - INFO - [2025-05-17 02:38:56 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  1325. 2025-05-17 02:38:56,913 - __main__ - INFO - sglang running req: 0 queue req: 0
  1326. 2025-05-17 02:38:57,760 - sglang - INFO - [2025-05-17 02:38:57] The server is fired up and ready to roll!
  1327. 2025-05-17 02:38:57,760 - __main__ - INFO - [2025-05-17 02:38:57] The server is fired up and ready to roll!
  1328. 2025-05-17 02:39:02,529 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-1
  1329. 2025-05-17 02:39:02,566 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-2
  1330. 2025-05-17 02:39:02,578 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-3
  1331. 2025-05-17 02:39:02,594 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-4
  1332. 2025-05-17 02:39:02,612 - __main__ - INFO - Built page query for olmocr_workspace/job_1747420686/input.pdf-5
  1333. 2025-05-17 02:39:06,379 - __main__ - INFO - Queue remaining: 0
  1334. 2025-05-17 02:39:06,379 - __main__ - INFO -
  1335. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1336. ----------------------------------------------------------------------------------
  1337. 2025-05-17 02:39:06,379 - __main__ - INFO -
  1338. Worker ID | started
  1339. ----------+--------
  1340. 0 | 5
  1341. 2025-05-17 02:39:16,381 - __main__ - INFO - Queue remaining: 0
  1342. 2025-05-17 02:39:16,381 - __main__ - INFO -
  1343. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1344. ----------------------------------------------------------------------------------
  1345. 2025-05-17 02:39:16,381 - __main__ - INFO -
  1346. Worker ID | started
  1347. ----------+--------
  1348. 0 | 5
  1349. 2025-05-17 02:39:24,017 - sglang - INFO - [2025-05-17 02:39:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  1350. 2025-05-17 02:39:24,017 - __main__ - INFO - sglang running req: 0 queue req: 0
  1351. 2025-05-17 02:39:24,809 - sglang - INFO - [2025-05-17 02:39:24 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  1352. 2025-05-17 02:39:24,809 - __main__ - INFO - sglang running req: 1 queue req: 0
  1353. 2025-05-17 02:39:26,382 - __main__ - INFO - Queue remaining: 0
  1354. 2025-05-17 02:39:26,383 - __main__ - INFO -
  1355. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1356. ----------------------------------------------------------------------------------
  1357. 2025-05-17 02:39:26,383 - __main__ - INFO -
  1358. Worker ID | started
  1359. ----------+--------
  1360. 0 | 5
  1361. 2025-05-17 02:39:28,234 - sglang - INFO - [2025-05-17 02:39:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.31, #queue-req: 0
  1362. 2025-05-17 02:39:28,234 - __main__ - INFO - sglang running req: 5 queue req: 0
  1363. 2025-05-17 02:39:28,499 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  1364. 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 1 exiting due to empty queue
  1365. 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 2 exiting due to empty queue
  1366. 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 3 exiting due to empty queue
  1367. 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 4 exiting due to empty queue
  1368. 2025-05-17 02:39:28,499 - __main__ - INFO - Worker 5 exiting due to empty queue
  1369. 2025-05-17 02:39:28,500 - __main__ - INFO - Worker 6 exiting due to empty queue
  1370. 2025-05-17 02:39:28,500 - __main__ - INFO - Worker 7 exiting due to empty queue
  1371. 2025-05-17 02:39:29,089 - sglang - INFO - [2025-05-17 02:39:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.75, #queue-req: 0
  1372. 2025-05-17 02:39:29,090 - __main__ - INFO - sglang running req: 5 queue req: 0
  1373. 2025-05-17 02:39:29,945 - sglang - INFO - [2025-05-17 02:39:29 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.90, #queue-req: 0
  1374. 2025-05-17 02:39:29,945 - __main__ - INFO - sglang running req: 5 queue req: 0
  1375. 2025-05-17 02:39:30,800 - sglang - INFO - [2025-05-17 02:39:30 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.78, #queue-req: 0
  1376. 2025-05-17 02:39:30,800 - __main__ - INFO - sglang running req: 5 queue req: 0
  1377. 2025-05-17 02:39:31,656 - sglang - INFO - [2025-05-17 02:39:31 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.67, #queue-req: 0
  1378. 2025-05-17 02:39:31,656 - __main__ - INFO - sglang running req: 5 queue req: 0
  1379. 2025-05-17 02:39:32,513 - sglang - INFO - [2025-05-17 02:39:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.32, #queue-req: 0
  1380. 2025-05-17 02:39:32,513 - __main__ - INFO - sglang running req: 5 queue req: 0
  1381. 2025-05-17 02:39:33,374 - sglang - INFO - [2025-05-17 02:39:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.41, #queue-req: 0
  1382. 2025-05-17 02:39:33,374 - __main__ - INFO - sglang running req: 5 queue req: 0
  1383. 2025-05-17 02:39:34,235 - sglang - INFO - [2025-05-17 02:39:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.14, #queue-req: 0
  1384. 2025-05-17 02:39:34,236 - __main__ - INFO - sglang running req: 5 queue req: 0
  1385. 2025-05-17 02:39:35,094 - sglang - INFO - [2025-05-17 02:39:35 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.84, #queue-req: 0
  1386. 2025-05-17 02:39:35,094 - __main__ - INFO - sglang running req: 3 queue req: 0
  1387. 2025-05-17 02:39:35,936 - sglang - INFO - [2025-05-17 02:39:35 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.67, #queue-req: 0
  1388. 2025-05-17 02:39:35,936 - __main__ - INFO - sglang running req: 3 queue req: 0
  1389. 2025-05-17 02:39:36,384 - __main__ - INFO - Queue remaining: 0
  1390. 2025-05-17 02:39:36,385 - __main__ - INFO -
  1391. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  1392. ----------------------------------------------------------------------------------
  1393. sglang_input_tokens 93.47 93.47
  1394. sglang_output_tokens 18.16 18.16
  1395. 2025-05-17 02:39:36,385 - __main__ - INFO -
  1396. Worker ID | finished | started
  1397. ----------+----------+--------
  1398. 0 | 4 | 5
  1399. 2025-05-17 02:39:36,767 - sglang - INFO - [2025-05-17 02:39:36 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 79.35, #queue-req: 0
  1400. 2025-05-17 02:39:36,768 - __main__ - INFO - sglang running req: 1 queue req: 0
  1401. 2025-05-17 02:39:37,592 - sglang - INFO - [2025-05-17 02:39:37 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.48, #queue-req: 0
  1402. 2025-05-17 02:39:37,593 - __main__ - INFO - sglang running req: 1 queue req: 0
  1403. 2025-05-17 02:39:38,257 - __main__ - INFO - Finished TaskGroup for worker on 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
  1404. 2025-05-17 02:39:38,258 - __main__ - INFO - Got 1 docs for 5cb3134f25c471b5a78a0f6d882d84ad299e2a6f
  1405. 2025-05-17 02:39:38,259 - __main__ - INFO - Worker 0 exiting due to empty queue
  1406. 2025-05-17 02:39:38,259 - __main__ - INFO - Work done
  1407. 2025-05-17 02:39:38,259 - __main__ - INFO - Got cancellation request for SGLang server
  1408. 2025-05-17 22:05:39,818 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  1409. 2025-05-17 22:05:39,818 - __main__ - INFO - Loading file at olmocr_workspace/job_1747490733/input.pdf as PDF document
  1410. 2025-05-17 22:05:39,818 - __main__ - INFO - Found 1 total pdf paths to add
  1411. 2025-05-17 22:05:39,822 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  1412. 2025-05-17 22:05:40,045 - __main__ - INFO - Starting pipeline with PID 399029
  1413. 2025-05-17 22:05:40,045 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  1414. 2025-05-17 22:07:49,638 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  1415. 2025-05-17 22:07:50,677 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  1416. 2025-05-17 22:07:51,731 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  1417. 2025-05-17 22:07:52,785 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  1418. 2025-05-17 22:07:53,842 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  1419. 2025-05-17 22:07:54,878 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  1420. 2025-05-17 22:07:55,819 - sglang - INFO - [2025-05-17 22:07:55] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1026987283, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1421. 2025-05-17 22:07:55,819 - __main__ - INFO - [2025-05-17 22:07:55] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1026987283, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1422. 2025-05-17 22:07:55,995 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  1423. 2025-05-17 22:07:57,029 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  1424. 2025-05-17 22:07:58,107 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  1425. 2025-05-17 22:07:59,160 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  1426. 2025-05-17 22:08:00,227 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  1427. 2025-05-17 22:08:01,291 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  1428. 2025-05-17 22:08:02,364 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  1429. 2025-05-17 22:08:02,647 - sglang - INFO - [2025-05-17 22:08:02] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1430. 2025-05-17 22:08:02,647 - __main__ - INFO - [2025-05-17 22:08:02] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1431. 2025-05-17 22:08:03,441 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  1432. 2025-05-17 22:08:04,528 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  1433. 2025-05-17 22:08:05,585 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  1434. 2025-05-17 22:08:06,669 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  1435. 2025-05-17 22:08:07,710 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  1436. 2025-05-17 22:08:08,777 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  1437. 2025-05-17 22:08:09,197 - sglang - INFO - [2025-05-17 22:08:09 TP0] Overlap scheduler is disabled for multimodal models.
  1438. 2025-05-17 22:08:09,198 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Overlap scheduler is disabled for multimodal models.
  1439. 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1440. 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1441. 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1442. 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1443. 2025-05-17 22:08:09,380 - sglang - INFO - [2025-05-17 22:08:09 TP0] Init torch distributed begin.
  1444. 2025-05-17 22:08:09,380 - __main__ - INFO - [2025-05-17 22:08:09 TP0] Init torch distributed begin.
  1445. 2025-05-17 22:08:09,854 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  1446. 2025-05-17 22:08:10,935 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  1447. 2025-05-17 22:08:11,995 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  1448. 2025-05-17 22:08:13,066 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  1449. 2025-05-17 22:08:14,131 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  1450. 2025-05-17 22:08:14,699 - sglang - INFO - [2025-05-17 22:08:14 TP0] Load weight begin. avail mem=23.33 GB
  1451. 2025-05-17 22:08:14,699 - __main__ - INFO - [2025-05-17 22:08:14 TP0] Load weight begin. avail mem=23.33 GB
  1452. 2025-05-17 22:08:15,208 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  1453. 2025-05-17 22:08:15,384 - sglang - INFO - [2025-05-17 22:08:15 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1454. 2025-05-17 22:08:15,384 - __main__ - INFO - [2025-05-17 22:08:15 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1455. 2025-05-17 22:08:15,384 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1456. 2025-05-17 22:08:15,384 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1457. 2025-05-17 22:08:15,384 - sglang - INFO - sock = connection.create_connection(
  1458. 2025-05-17 22:08:15,384 - __main__ - INFO - sock = connection.create_connection(
  1459. 2025-05-17 22:08:15,384 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1460. 2025-05-17 22:08:15,384 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1461. 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  1462. 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  1463. 2025-05-17 22:08:15,385 - sglang - INFO - raise err
  1464. 2025-05-17 22:08:15,385 - __main__ - INFO - raise err
  1465. 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  1466. 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  1467. 2025-05-17 22:08:15,385 - sglang - INFO - sock.connect(sa)
  1468. 2025-05-17 22:08:15,385 - __main__ - INFO - sock.connect(sa)
  1469. 2025-05-17 22:08:15,385 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  1470. 2025-05-17 22:08:15,385 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  1471. 2025-05-17 22:08:15,385 - sglang - INFO -
  1472. 2025-05-17 22:08:15,385 - __main__ - INFO -
  1473. 2025-05-17 22:08:15,385 - sglang - INFO - The above exception was the direct cause of the following exception:
  1474. 2025-05-17 22:08:15,385 - __main__ - INFO - The above exception was the direct cause of the following exception:
  1475. 2025-05-17 22:08:15,385 - sglang - INFO -
  1476. 2025-05-17 22:08:15,385 - __main__ - INFO -
  1477. 2025-05-17 22:08:15,385 - sglang - INFO - Traceback (most recent call last):
  1478. 2025-05-17 22:08:15,385 - __main__ - INFO - Traceback (most recent call last):
  1479. 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  1480. 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  1481. 2025-05-17 22:08:15,385 - sglang - INFO - response = self._make_request(
  1482. 2025-05-17 22:08:15,385 - __main__ - INFO - response = self._make_request(
  1483. 2025-05-17 22:08:15,385 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1484. 2025-05-17 22:08:15,385 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1485. 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  1486. 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  1487. 2025-05-17 22:08:15,385 - sglang - INFO - raise new_e
  1488. 2025-05-17 22:08:15,385 - __main__ - INFO - raise new_e
  1489. 2025-05-17 22:08:15,385 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  1490. 2025-05-17 22:08:15,385 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  1491. 2025-05-17 22:08:15,386 - sglang - INFO - self._validate_conn(conn)
  1492. 2025-05-17 22:08:15,386 - __main__ - INFO - self._validate_conn(conn)
  1493. 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  1494. 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  1495. 2025-05-17 22:08:15,386 - sglang - INFO - conn.connect()
  1496. 2025-05-17 22:08:15,386 - __main__ - INFO - conn.connect()
  1497. 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  1498. 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  1499. 2025-05-17 22:08:15,386 - sglang - INFO - self.sock = sock = self._new_conn()
  1500. 2025-05-17 22:08:15,386 - __main__ - INFO - self.sock = sock = self._new_conn()
  1501. 2025-05-17 22:08:15,386 - sglang - INFO - ^^^^^^^^^^^^^^^^
  1502. 2025-05-17 22:08:15,386 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  1503. 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  1504. 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  1505. 2025-05-17 22:08:15,386 - sglang - INFO - raise NewConnectionError(
  1506. 2025-05-17 22:08:15,386 - __main__ - INFO - raise NewConnectionError(
  1507. 2025-05-17 22:08:15,386 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  1508. 2025-05-17 22:08:15,386 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  1509. 2025-05-17 22:08:15,386 - sglang - INFO -
  1510. 2025-05-17 22:08:15,386 - __main__ - INFO -
  1511. 2025-05-17 22:08:15,386 - sglang - INFO - The above exception was the direct cause of the following exception:
  1512. 2025-05-17 22:08:15,386 - __main__ - INFO - The above exception was the direct cause of the following exception:
  1513. 2025-05-17 22:08:15,386 - sglang - INFO -
  1514. 2025-05-17 22:08:15,386 - __main__ - INFO -
  1515. 2025-05-17 22:08:15,386 - sglang - INFO - Traceback (most recent call last):
  1516. 2025-05-17 22:08:15,386 - __main__ - INFO - Traceback (most recent call last):
  1517. 2025-05-17 22:08:15,386 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  1518. 2025-05-17 22:08:15,386 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  1519. 2025-05-17 22:08:15,386 - sglang - INFO - resp = conn.urlopen(
  1520. 2025-05-17 22:08:15,386 - __main__ - INFO - resp = conn.urlopen(
  1521. 2025-05-17 22:08:15,386 - sglang - INFO - ^^^^^^^^^^^^^
  1522. 2025-05-17 22:08:15,386 - __main__ - INFO - ^^^^^^^^^^^^^
  1523. 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  1524. 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  1525. 2025-05-17 22:08:15,387 - sglang - INFO - retries = retries.increment(
  1526. 2025-05-17 22:08:15,387 - __main__ - INFO - retries = retries.increment(
  1527. 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  1528. 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  1529. 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  1530. 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  1531. 2025-05-17 22:08:15,387 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  1532. 2025-05-17 22:08:15,387 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  1533. 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1534. 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1535. 2025-05-17 22:08:15,387 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  1536. 2025-05-17 22:08:15,387 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  1537. 2025-05-17 22:08:15,387 - sglang - INFO -
  1538. 2025-05-17 22:08:15,387 - __main__ - INFO -
  1539. 2025-05-17 22:08:15,387 - sglang - INFO - During handling of the above exception, another exception occurred:
  1540. 2025-05-17 22:08:15,387 - __main__ - INFO - During handling of the above exception, another exception occurred:
  1541. 2025-05-17 22:08:15,387 - sglang - INFO -
  1542. 2025-05-17 22:08:15,387 - __main__ - INFO -
  1543. 2025-05-17 22:08:15,387 - sglang - INFO - Traceback (most recent call last):
  1544. 2025-05-17 22:08:15,387 - __main__ - INFO - Traceback (most recent call last):
  1545. 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  1546. 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  1547. 2025-05-17 22:08:15,387 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  1548. 2025-05-17 22:08:15,387 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  1549. 2025-05-17 22:08:15,387 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1550. 2025-05-17 22:08:15,387 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1551. 2025-05-17 22:08:15,387 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  1552. 2025-05-17 22:08:15,387 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  1553. 2025-05-17 22:08:15,387 - sglang - INFO - self.tp_worker = TpWorkerClass(
  1554. 2025-05-17 22:08:15,387 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  1555. 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^^^
  1556. 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^^^
  1557. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  1558. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  1559. 2025-05-17 22:08:15,388 - sglang - INFO - self.model_runner = ModelRunner(
  1560. 2025-05-17 22:08:15,388 - __main__ - INFO - self.model_runner = ModelRunner(
  1561. 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^
  1562. 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^
  1563. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  1564. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  1565. 2025-05-17 22:08:15,388 - sglang - INFO - self.load_model()
  1566. 2025-05-17 22:08:15,388 - __main__ - INFO - self.load_model()
  1567. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  1568. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  1569. 2025-05-17 22:08:15,388 - sglang - INFO - self.model = get_model(
  1570. 2025-05-17 22:08:15,388 - __main__ - INFO - self.model = get_model(
  1571. 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^
  1572. 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^
  1573. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  1574. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  1575. 2025-05-17 22:08:15,388 - sglang - INFO - return loader.load_model(
  1576. 2025-05-17 22:08:15,388 - __main__ - INFO - return loader.load_model(
  1577. 2025-05-17 22:08:15,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  1578. 2025-05-17 22:08:15,388 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  1579. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  1580. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  1581. 2025-05-17 22:08:15,388 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  1582. 2025-05-17 22:08:15,388 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  1583. 2025-05-17 22:08:15,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  1584. 2025-05-17 22:08:15,388 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  1585. 2025-05-17 22:08:15,388 - sglang - INFO - for name, loaded_weight in weights:
  1586. 2025-05-17 22:08:15,388 - __main__ - INFO - for name, loaded_weight in weights:
  1587. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  1588. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  1589. 2025-05-17 22:08:15,389 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  1590. 2025-05-17 22:08:15,389 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  1591. 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1592. 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1593. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  1594. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  1595. 2025-05-17 22:08:15,389 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  1596. 2025-05-17 22:08:15,389 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  1597. 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  1598. 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  1599. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  1600. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  1601. 2025-05-17 22:08:15,389 - sglang - INFO - hf_folder = download_weights_from_hf(
  1602. 2025-05-17 22:08:15,389 - __main__ - INFO - hf_folder = download_weights_from_hf(
  1603. 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  1604. 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  1605. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  1606. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  1607. 2025-05-17 22:08:15,389 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  1608. 2025-05-17 22:08:15,389 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  1609. 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1610. 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1611. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  1612. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  1613. 2025-05-17 22:08:15,389 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  1614. 2025-05-17 22:08:15,389 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  1615. 2025-05-17 22:08:15,389 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1616. 2025-05-17 22:08:15,389 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1617. 2025-05-17 22:08:15,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  1618. 2025-05-17 22:08:15,389 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  1619. 2025-05-17 22:08:15,389 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  1620. 2025-05-17 22:08:15,390 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  1621. 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1622. 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1623. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  1624. 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  1625. 2025-05-17 22:08:15,390 - sglang - INFO - self._api.repo_info(
  1626. 2025-05-17 22:08:15,390 - __main__ - INFO - self._api.repo_info(
  1627. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1628. 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1629. 2025-05-17 22:08:15,390 - sglang - INFO - return fn(*args, **kwargs)
  1630. 2025-05-17 22:08:15,390 - __main__ - INFO - return fn(*args, **kwargs)
  1631. 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1632. 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1633. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  1634. 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  1635. 2025-05-17 22:08:15,390 - sglang - INFO - return method(
  1636. 2025-05-17 22:08:15,390 - __main__ - INFO - return method(
  1637. 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^
  1638. 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^
  1639. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1640. 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1641. 2025-05-17 22:08:15,390 - sglang - INFO - return fn(*args, **kwargs)
  1642. 2025-05-17 22:08:15,390 - __main__ - INFO - return fn(*args, **kwargs)
  1643. 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1644. 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1645. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  1646. 2025-05-17 22:08:15,390 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  1647. 2025-05-17 22:08:15,390 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  1648. 2025-05-17 22:08:15,390 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  1649. 2025-05-17 22:08:15,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1650. 2025-05-17 22:08:15,390 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1651. 2025-05-17 22:08:15,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  1652. 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  1653. 2025-05-17 22:08:15,391 - sglang - INFO - return self.request("GET", url, **kwargs)
  1654. 2025-05-17 22:08:15,391 - __main__ - INFO - return self.request("GET", url, **kwargs)
  1655. 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1656. 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1657. 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  1658. 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  1659. 2025-05-17 22:08:15,391 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  1660. 2025-05-17 22:08:15,391 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  1661. 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1662. 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1663. 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  1664. 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  1665. 2025-05-17 22:08:15,391 - sglang - INFO - r = adapter.send(request, **kwargs)
  1666. 2025-05-17 22:08:15,391 - __main__ - INFO - r = adapter.send(request, **kwargs)
  1667. 2025-05-17 22:08:15,391 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1668. 2025-05-17 22:08:15,391 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1669. 2025-05-17 22:08:15,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  1670. 2025-05-17 22:08:15,391 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  1671. 2025-05-17 22:08:15,391 - sglang - INFO - return super().send(request, *args, **kwargs)
  1672. 2025-05-17 22:08:15,391 - __main__ - INFO - return super().send(request, *args, **kwargs)
  1673. 2025-05-17 22:08:15,392 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1674. 2025-05-17 22:08:15,392 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1675. 2025-05-17 22:08:15,392 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  1676. 2025-05-17 22:08:15,392 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  1677. 2025-05-17 22:08:15,392 - sglang - INFO - raise ConnectionError(e, request=request)
  1678. 2025-05-17 22:08:15,392 - __main__ - INFO - raise ConnectionError(e, request=request)
  1679. 2025-05-17 22:08:15,392 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: ae2f99d1-1701-487a-8bb1-775aa4b85868)')
  1680. 2025-05-17 22:08:15,392 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa0306d32d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: ae2f99d1-1701-487a-8bb1-775aa4b85868)')
  1681. 2025-05-17 22:08:15,392 - sglang - INFO -
  1682. 2025-05-17 22:08:15,392 - __main__ - INFO -
  1683. 2025-05-17 22:08:15,393 - sglang - INFO - [2025-05-17 22:08:15] Received sigquit from a child proces. It usually means the child failed.
  1684. 2025-05-17 22:08:15,393 - __main__ - INFO - [2025-05-17 22:08:15] Received sigquit from a child proces. It usually means the child failed.
  1685. 2025-05-17 22:08:15,556 - __main__ - WARNING - SGLang server task ended
  1686. 2025-05-17 22:08:16,284 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  1687. 2025-05-17 22:08:17,351 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  1688. 2025-05-17 22:08:18,418 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  1689. 2025-05-17 22:08:19,486 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  1690. 2025-05-17 22:08:20,554 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  1691. 2025-05-17 22:08:21,617 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  1692. 2025-05-17 22:08:21,653 - sglang - INFO - [2025-05-17 22:08:21] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=245788542, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1693. 2025-05-17 22:08:21,653 - __main__ - INFO - [2025-05-17 22:08:21] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=245788542, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1694. 2025-05-17 22:08:22,679 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  1695. 2025-05-17 22:08:23,765 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  1696. 2025-05-17 22:08:24,823 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  1697. 2025-05-17 22:08:25,890 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  1698. 2025-05-17 22:08:27,034 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  1699. 2025-05-17 22:08:28,099 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  1700. 2025-05-17 22:08:28,528 - sglang - INFO - [2025-05-17 22:08:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1701. 2025-05-17 22:08:28,528 - __main__ - INFO - [2025-05-17 22:08:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1702. 2025-05-17 22:08:29,176 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  1703. 2025-05-17 22:08:30,243 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  1704. 2025-05-17 22:08:31,310 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  1705. 2025-05-17 22:08:32,378 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  1706. 2025-05-17 22:08:33,446 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  1707. 2025-05-17 22:08:34,428 - sglang - INFO - [2025-05-17 22:08:34 TP0] Overlap scheduler is disabled for multimodal models.
  1708. 2025-05-17 22:08:34,428 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Overlap scheduler is disabled for multimodal models.
  1709. 2025-05-17 22:08:34,522 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  1710. 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1711. 2025-05-17 22:08:34,610 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1712. 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1713. 2025-05-17 22:08:34,610 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1714. 2025-05-17 22:08:34,610 - sglang - INFO - [2025-05-17 22:08:34 TP0] Init torch distributed begin.
  1715. 2025-05-17 22:08:34,611 - __main__ - INFO - [2025-05-17 22:08:34 TP0] Init torch distributed begin.
  1716. 2025-05-17 22:08:35,599 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  1717. 2025-05-17 22:08:36,667 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  1718. 2025-05-17 22:08:37,722 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  1719. 2025-05-17 22:08:38,789 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  1720. 2025-05-17 22:08:39,857 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  1721. 2025-05-17 22:08:39,992 - sglang - INFO - [2025-05-17 22:08:39 TP0] Load weight begin. avail mem=23.33 GB
  1722. 2025-05-17 22:08:39,992 - __main__ - INFO - [2025-05-17 22:08:39 TP0] Load weight begin. avail mem=23.33 GB
  1723. 2025-05-17 22:08:40,668 - sglang - INFO - [2025-05-17 22:08:40 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1724. 2025-05-17 22:08:40,668 - __main__ - INFO - [2025-05-17 22:08:40 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1725. 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1726. 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1727. 2025-05-17 22:08:40,669 - sglang - INFO - sock = connection.create_connection(
  1728. 2025-05-17 22:08:40,669 - __main__ - INFO - sock = connection.create_connection(
  1729. 2025-05-17 22:08:40,669 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1730. 2025-05-17 22:08:40,669 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1731. 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  1732. 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  1733. 2025-05-17 22:08:40,669 - sglang - INFO - raise err
  1734. 2025-05-17 22:08:40,669 - __main__ - INFO - raise err
  1735. 2025-05-17 22:08:40,669 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  1736. 2025-05-17 22:08:40,669 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  1737. 2025-05-17 22:08:40,669 - sglang - INFO - sock.connect(sa)
  1738. 2025-05-17 22:08:40,669 - __main__ - INFO - sock.connect(sa)
  1739. 2025-05-17 22:08:40,670 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  1740. 2025-05-17 22:08:40,670 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  1741. 2025-05-17 22:08:40,670 - sglang - INFO -
  1742. 2025-05-17 22:08:40,670 - __main__ - INFO -
  1743. 2025-05-17 22:08:40,670 - sglang - INFO - The above exception was the direct cause of the following exception:
  1744. 2025-05-17 22:08:40,670 - __main__ - INFO - The above exception was the direct cause of the following exception:
  1745. 2025-05-17 22:08:40,670 - sglang - INFO -
  1746. 2025-05-17 22:08:40,670 - __main__ - INFO -
  1747. 2025-05-17 22:08:40,670 - sglang - INFO - Traceback (most recent call last):
  1748. 2025-05-17 22:08:40,670 - __main__ - INFO - Traceback (most recent call last):
  1749. 2025-05-17 22:08:40,670 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  1750. 2025-05-17 22:08:40,670 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  1751. 2025-05-17 22:08:40,670 - sglang - INFO - response = self._make_request(
  1752. 2025-05-17 22:08:40,670 - __main__ - INFO - response = self._make_request(
  1753. 2025-05-17 22:08:40,670 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1754. 2025-05-17 22:08:40,670 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1755. 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  1756. 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  1757. 2025-05-17 22:08:40,671 - sglang - INFO - raise new_e
  1758. 2025-05-17 22:08:40,671 - __main__ - INFO - raise new_e
  1759. 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  1760. 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  1761. 2025-05-17 22:08:40,671 - sglang - INFO - self._validate_conn(conn)
  1762. 2025-05-17 22:08:40,671 - __main__ - INFO - self._validate_conn(conn)
  1763. 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  1764. 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  1765. 2025-05-17 22:08:40,671 - sglang - INFO - conn.connect()
  1766. 2025-05-17 22:08:40,671 - __main__ - INFO - conn.connect()
  1767. 2025-05-17 22:08:40,671 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  1768. 2025-05-17 22:08:40,671 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  1769. 2025-05-17 22:08:40,671 - sglang - INFO - self.sock = sock = self._new_conn()
  1770. 2025-05-17 22:08:40,671 - __main__ - INFO - self.sock = sock = self._new_conn()
  1771. 2025-05-17 22:08:40,671 - sglang - INFO - ^^^^^^^^^^^^^^^^
  1772. 2025-05-17 22:08:40,672 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  1773. 2025-05-17 22:08:40,672 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  1774. 2025-05-17 22:08:40,672 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  1775. 2025-05-17 22:08:40,672 - sglang - INFO - raise NewConnectionError(
  1776. 2025-05-17 22:08:40,672 - __main__ - INFO - raise NewConnectionError(
  1777. 2025-05-17 22:08:40,672 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable
  1778. 2025-05-17 22:08:40,672 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable
  1779. 2025-05-17 22:08:40,672 - sglang - INFO -
  1780. 2025-05-17 22:08:40,672 - __main__ - INFO -
  1781. 2025-05-17 22:08:40,672 - sglang - INFO - The above exception was the direct cause of the following exception:
  1782. 2025-05-17 22:08:40,672 - __main__ - INFO - The above exception was the direct cause of the following exception:
  1783. 2025-05-17 22:08:40,672 - sglang - INFO -
  1784. 2025-05-17 22:08:40,672 - __main__ - INFO -
  1785. 2025-05-17 22:08:40,672 - sglang - INFO - Traceback (most recent call last):
  1786. 2025-05-17 22:08:40,672 - __main__ - INFO - Traceback (most recent call last):
  1787. 2025-05-17 22:08:40,672 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  1788. 2025-05-17 22:08:40,672 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  1789. 2025-05-17 22:08:40,673 - sglang - INFO - resp = conn.urlopen(
  1790. 2025-05-17 22:08:40,673 - __main__ - INFO - resp = conn.urlopen(
  1791. 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^
  1792. 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^
  1793. 2025-05-17 22:08:40,673 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  1794. 2025-05-17 22:08:40,673 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  1795. 2025-05-17 22:08:40,673 - sglang - INFO - retries = retries.increment(
  1796. 2025-05-17 22:08:40,673 - __main__ - INFO - retries = retries.increment(
  1797. 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  1798. 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  1799. 2025-05-17 22:08:40,673 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  1800. 2025-05-17 22:08:40,673 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  1801. 2025-05-17 22:08:40,673 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  1802. 2025-05-17 22:08:40,673 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  1803. 2025-05-17 22:08:40,673 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1804. 2025-05-17 22:08:40,673 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1805. 2025-05-17 22:08:40,674 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  1806. 2025-05-17 22:08:40,674 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  1807. 2025-05-17 22:08:40,674 - sglang - INFO -
  1808. 2025-05-17 22:08:40,674 - __main__ - INFO -
  1809. 2025-05-17 22:08:40,674 - sglang - INFO - During handling of the above exception, another exception occurred:
  1810. 2025-05-17 22:08:40,674 - __main__ - INFO - During handling of the above exception, another exception occurred:
  1811. 2025-05-17 22:08:40,674 - sglang - INFO -
  1812. 2025-05-17 22:08:40,674 - __main__ - INFO -
  1813. 2025-05-17 22:08:40,674 - sglang - INFO - Traceback (most recent call last):
  1814. 2025-05-17 22:08:40,674 - __main__ - INFO - Traceback (most recent call last):
  1815. 2025-05-17 22:08:40,674 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  1816. 2025-05-17 22:08:40,674 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  1817. 2025-05-17 22:08:40,674 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  1818. 2025-05-17 22:08:40,674 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  1819. 2025-05-17 22:08:40,674 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1820. 2025-05-17 22:08:40,674 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1821. 2025-05-17 22:08:40,674 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  1822. 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  1823. 2025-05-17 22:08:40,675 - sglang - INFO - self.tp_worker = TpWorkerClass(
  1824. 2025-05-17 22:08:40,675 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  1825. 2025-05-17 22:08:40,675 - sglang - INFO - ^^^^^^^^^^^^^^
  1826. 2025-05-17 22:08:40,675 - __main__ - INFO - ^^^^^^^^^^^^^^
  1827. 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  1828. 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  1829. 2025-05-17 22:08:40,675 - sglang - INFO - self.model_runner = ModelRunner(
  1830. 2025-05-17 22:08:40,675 - __main__ - INFO - self.model_runner = ModelRunner(
  1831. 2025-05-17 22:08:40,675 - sglang - INFO - ^^^^^^^^^^^^
  1832. 2025-05-17 22:08:40,675 - __main__ - INFO - ^^^^^^^^^^^^
  1833. 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  1834. 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  1835. 2025-05-17 22:08:40,675 - sglang - INFO - self.load_model()
  1836. 2025-05-17 22:08:40,675 - __main__ - INFO - self.load_model()
  1837. 2025-05-17 22:08:40,675 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  1838. 2025-05-17 22:08:40,675 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  1839. 2025-05-17 22:08:40,676 - sglang - INFO - self.model = get_model(
  1840. 2025-05-17 22:08:40,676 - __main__ - INFO - self.model = get_model(
  1841. 2025-05-17 22:08:40,676 - sglang - INFO - ^^^^^^^^^^
  1842. 2025-05-17 22:08:40,676 - __main__ - INFO - ^^^^^^^^^^
  1843. 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  1844. 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  1845. 2025-05-17 22:08:40,676 - sglang - INFO - return loader.load_model(
  1846. 2025-05-17 22:08:40,676 - __main__ - INFO - return loader.load_model(
  1847. 2025-05-17 22:08:40,676 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  1848. 2025-05-17 22:08:40,676 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  1849. 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  1850. 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  1851. 2025-05-17 22:08:40,676 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  1852. 2025-05-17 22:08:40,676 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  1853. 2025-05-17 22:08:40,676 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  1854. 2025-05-17 22:08:40,676 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  1855. 2025-05-17 22:08:40,677 - sglang - INFO - for name, loaded_weight in weights:
  1856. 2025-05-17 22:08:40,677 - __main__ - INFO - for name, loaded_weight in weights:
  1857. 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  1858. 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  1859. 2025-05-17 22:08:40,677 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  1860. 2025-05-17 22:08:40,677 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  1861. 2025-05-17 22:08:40,677 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1862. 2025-05-17 22:08:40,677 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1863. 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  1864. 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  1865. 2025-05-17 22:08:40,677 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  1866. 2025-05-17 22:08:40,677 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  1867. 2025-05-17 22:08:40,677 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  1868. 2025-05-17 22:08:40,677 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  1869. 2025-05-17 22:08:40,677 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  1870. 2025-05-17 22:08:40,677 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  1871. 2025-05-17 22:08:40,677 - sglang - INFO - hf_folder = download_weights_from_hf(
  1872. 2025-05-17 22:08:40,678 - __main__ - INFO - hf_folder = download_weights_from_hf(
  1873. 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  1874. 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  1875. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  1876. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  1877. 2025-05-17 22:08:40,678 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  1878. 2025-05-17 22:08:40,678 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  1879. 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1880. 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1881. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  1882. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  1883. 2025-05-17 22:08:40,678 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  1884. 2025-05-17 22:08:40,678 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  1885. 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1886. 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1887. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  1888. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  1889. 2025-05-17 22:08:40,678 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  1890. 2025-05-17 22:08:40,678 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  1891. 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1892. 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1893. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  1894. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  1895. 2025-05-17 22:08:40,678 - sglang - INFO - self._api.repo_info(
  1896. 2025-05-17 22:08:40,678 - __main__ - INFO - self._api.repo_info(
  1897. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1898. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1899. 2025-05-17 22:08:40,678 - sglang - INFO - return fn(*args, **kwargs)
  1900. 2025-05-17 22:08:40,678 - __main__ - INFO - return fn(*args, **kwargs)
  1901. 2025-05-17 22:08:40,678 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1902. 2025-05-17 22:08:40,678 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1903. 2025-05-17 22:08:40,678 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  1904. 2025-05-17 22:08:40,678 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  1905. 2025-05-17 22:08:40,678 - sglang - INFO - return method(
  1906. 2025-05-17 22:08:40,678 - __main__ - INFO - return method(
  1907. 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^
  1908. 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^
  1909. 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1910. 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  1911. 2025-05-17 22:08:40,679 - sglang - INFO - return fn(*args, **kwargs)
  1912. 2025-05-17 22:08:40,679 - __main__ - INFO - return fn(*args, **kwargs)
  1913. 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  1914. 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  1915. 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  1916. 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  1917. 2025-05-17 22:08:40,679 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  1918. 2025-05-17 22:08:40,679 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  1919. 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1920. 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1921. 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  1922. 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  1923. 2025-05-17 22:08:40,679 - sglang - INFO - return self.request("GET", url, **kwargs)
  1924. 2025-05-17 22:08:40,679 - __main__ - INFO - return self.request("GET", url, **kwargs)
  1925. 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1926. 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1927. 2025-05-17 22:08:40,679 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  1928. 2025-05-17 22:08:40,679 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  1929. 2025-05-17 22:08:40,679 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  1930. 2025-05-17 22:08:40,679 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  1931. 2025-05-17 22:08:40,679 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1932. 2025-05-17 22:08:40,679 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1933. 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  1934. 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  1935. 2025-05-17 22:08:40,680 - sglang - INFO - r = adapter.send(request, **kwargs)
  1936. 2025-05-17 22:08:40,680 - __main__ - INFO - r = adapter.send(request, **kwargs)
  1937. 2025-05-17 22:08:40,680 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1938. 2025-05-17 22:08:40,680 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1939. 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  1940. 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  1941. 2025-05-17 22:08:40,680 - sglang - INFO - return super().send(request, *args, **kwargs)
  1942. 2025-05-17 22:08:40,680 - __main__ - INFO - return super().send(request, *args, **kwargs)
  1943. 2025-05-17 22:08:40,680 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1944. 2025-05-17 22:08:40,680 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1945. 2025-05-17 22:08:40,680 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  1946. 2025-05-17 22:08:40,680 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  1947. 2025-05-17 22:08:40,680 - sglang - INFO - raise ConnectionError(e, request=request)
  1948. 2025-05-17 22:08:40,680 - __main__ - INFO - raise ConnectionError(e, request=request)
  1949. 2025-05-17 22:08:40,680 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 1d8994fa-4819-4677-98a0-06dfaeccb18c)')
  1950. 2025-05-17 22:08:40,680 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa17031b890>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 1d8994fa-4819-4677-98a0-06dfaeccb18c)')
  1951. 2025-05-17 22:08:40,680 - sglang - INFO -
  1952. 2025-05-17 22:08:40,680 - __main__ - INFO -
  1953. 2025-05-17 22:08:40,680 - sglang - INFO - [2025-05-17 22:08:40] Received sigquit from a child proces. It usually means the child failed.
  1954. 2025-05-17 22:08:40,680 - __main__ - INFO - [2025-05-17 22:08:40] Received sigquit from a child proces. It usually means the child failed.
  1955. 2025-05-17 22:08:40,856 - __main__ - WARNING - SGLang server task ended
  1956. 2025-05-17 22:08:40,981 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  1957. 2025-05-17 22:08:42,018 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  1958. 2025-05-17 22:08:43,075 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  1959. 2025-05-17 22:08:44,141 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  1960. 2025-05-17 22:08:45,208 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  1961. 2025-05-17 22:08:46,343 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  1962. 2025-05-17 22:08:47,414 - sglang - INFO - [2025-05-17 22:08:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=803034972, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1963. 2025-05-17 22:08:47,414 - __main__ - INFO - [2025-05-17 22:08:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=803034972, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  1964. 2025-05-17 22:08:47,415 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  1965. 2025-05-17 22:08:48,487 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  1966. 2025-05-17 22:08:49,520 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  1967. 2025-05-17 22:08:50,581 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  1968. 2025-05-17 22:08:51,650 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  1969. 2025-05-17 22:08:52,707 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  1970. 2025-05-17 22:08:53,775 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  1971. 2025-05-17 22:08:54,258 - sglang - INFO - [2025-05-17 22:08:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1972. 2025-05-17 22:08:54,258 - __main__ - INFO - [2025-05-17 22:08:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  1973. 2025-05-17 22:08:54,852 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  1974. 2025-05-17 22:08:55,919 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  1975. 2025-05-17 22:08:56,988 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  1976. 2025-05-17 22:08:58,051 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  1977. 2025-05-17 22:08:59,104 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  1978. 2025-05-17 22:08:59,768 - sglang - INFO - [2025-05-17 22:08:59 TP0] Overlap scheduler is disabled for multimodal models.
  1979. 2025-05-17 22:08:59,769 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Overlap scheduler is disabled for multimodal models.
  1980. 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1981. 2025-05-17 22:08:59,945 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  1982. 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1983. 2025-05-17 22:08:59,945 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  1984. 2025-05-17 22:08:59,945 - sglang - INFO - [2025-05-17 22:08:59 TP0] Init torch distributed begin.
  1985. 2025-05-17 22:08:59,946 - __main__ - INFO - [2025-05-17 22:08:59 TP0] Init torch distributed begin.
  1986. 2025-05-17 22:09:00,182 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  1987. 2025-05-17 22:09:01,248 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  1988. 2025-05-17 22:09:02,312 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  1989. 2025-05-17 22:09:03,380 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  1990. 2025-05-17 22:09:04,448 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  1991. 2025-05-17 22:09:05,299 - sglang - INFO - [2025-05-17 22:09:05 TP0] Load weight begin. avail mem=23.33 GB
  1992. 2025-05-17 22:09:05,299 - __main__ - INFO - [2025-05-17 22:09:05 TP0] Load weight begin. avail mem=23.33 GB
  1993. 2025-05-17 22:09:05,523 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  1994. 2025-05-17 22:09:05,902 - sglang - INFO - [2025-05-17 22:09:05 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1995. 2025-05-17 22:09:05,902 - __main__ - INFO - [2025-05-17 22:09:05 TP0] Scheduler hit an exception: Traceback (most recent call last):
  1996. 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1997. 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  1998. 2025-05-17 22:09:05,903 - sglang - INFO - sock = connection.create_connection(
  1999. 2025-05-17 22:09:05,903 - __main__ - INFO - sock = connection.create_connection(
  2000. 2025-05-17 22:09:05,903 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2001. 2025-05-17 22:09:05,903 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2002. 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  2003. 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  2004. 2025-05-17 22:09:05,903 - sglang - INFO - raise err
  2005. 2025-05-17 22:09:05,903 - __main__ - INFO - raise err
  2006. 2025-05-17 22:09:05,903 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  2007. 2025-05-17 22:09:05,903 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  2008. 2025-05-17 22:09:05,903 - sglang - INFO - sock.connect(sa)
  2009. 2025-05-17 22:09:05,903 - __main__ - INFO - sock.connect(sa)
  2010. 2025-05-17 22:09:05,903 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  2011. 2025-05-17 22:09:05,903 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  2012. 2025-05-17 22:09:05,903 - sglang - INFO -
  2013. 2025-05-17 22:09:05,903 - __main__ - INFO -
  2014. 2025-05-17 22:09:05,903 - sglang - INFO - The above exception was the direct cause of the following exception:
  2015. 2025-05-17 22:09:05,903 - __main__ - INFO - The above exception was the direct cause of the following exception:
  2016. 2025-05-17 22:09:05,904 - sglang - INFO -
  2017. 2025-05-17 22:09:05,904 - __main__ - INFO -
  2018. 2025-05-17 22:09:05,904 - sglang - INFO - Traceback (most recent call last):
  2019. 2025-05-17 22:09:05,904 - __main__ - INFO - Traceback (most recent call last):
  2020. 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  2021. 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  2022. 2025-05-17 22:09:05,904 - sglang - INFO - response = self._make_request(
  2023. 2025-05-17 22:09:05,904 - __main__ - INFO - response = self._make_request(
  2024. 2025-05-17 22:09:05,904 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  2025. 2025-05-17 22:09:05,904 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  2026. 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  2027. 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  2028. 2025-05-17 22:09:05,904 - sglang - INFO - raise new_e
  2029. 2025-05-17 22:09:05,904 - __main__ - INFO - raise new_e
  2030. 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  2031. 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  2032. 2025-05-17 22:09:05,904 - sglang - INFO - self._validate_conn(conn)
  2033. 2025-05-17 22:09:05,904 - __main__ - INFO - self._validate_conn(conn)
  2034. 2025-05-17 22:09:05,904 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  2035. 2025-05-17 22:09:05,904 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  2036. 2025-05-17 22:09:05,904 - sglang - INFO - conn.connect()
  2037. 2025-05-17 22:09:05,904 - __main__ - INFO - conn.connect()
  2038. 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  2039. 2025-05-17 22:09:05,905 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  2040. 2025-05-17 22:09:05,905 - sglang - INFO - self.sock = sock = self._new_conn()
  2041. 2025-05-17 22:09:05,905 - __main__ - INFO - self.sock = sock = self._new_conn()
  2042. 2025-05-17 22:09:05,905 - sglang - INFO - ^^^^^^^^^^^^^^^^
  2043. 2025-05-17 22:09:05,905 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  2044. 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  2045. 2025-05-17 22:09:05,905 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  2046. 2025-05-17 22:09:05,905 - sglang - INFO - raise NewConnectionError(
  2047. 2025-05-17 22:09:05,905 - __main__ - INFO - raise NewConnectionError(
  2048. 2025-05-17 22:09:05,905 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable
  2049. 2025-05-17 22:09:05,905 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable
  2050. 2025-05-17 22:09:05,905 - sglang - INFO -
  2051. 2025-05-17 22:09:05,905 - __main__ - INFO -
  2052. 2025-05-17 22:09:05,905 - sglang - INFO - The above exception was the direct cause of the following exception:
  2053. 2025-05-17 22:09:05,905 - __main__ - INFO - The above exception was the direct cause of the following exception:
  2054. 2025-05-17 22:09:05,905 - sglang - INFO -
  2055. 2025-05-17 22:09:05,905 - __main__ - INFO -
  2056. 2025-05-17 22:09:05,905 - sglang - INFO - Traceback (most recent call last):
  2057. 2025-05-17 22:09:05,905 - __main__ - INFO - Traceback (most recent call last):
  2058. 2025-05-17 22:09:05,905 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  2059. 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  2060. 2025-05-17 22:09:05,906 - sglang - INFO - resp = conn.urlopen(
  2061. 2025-05-17 22:09:05,906 - __main__ - INFO - resp = conn.urlopen(
  2062. 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^
  2063. 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^
  2064. 2025-05-17 22:09:05,906 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  2065. 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  2066. 2025-05-17 22:09:05,906 - sglang - INFO - retries = retries.increment(
  2067. 2025-05-17 22:09:05,906 - __main__ - INFO - retries = retries.increment(
  2068. 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  2069. 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  2070. 2025-05-17 22:09:05,906 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  2071. 2025-05-17 22:09:05,906 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  2072. 2025-05-17 22:09:05,906 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  2073. 2025-05-17 22:09:05,906 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  2074. 2025-05-17 22:09:05,906 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2075. 2025-05-17 22:09:05,906 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2076. 2025-05-17 22:09:05,906 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  2077. 2025-05-17 22:09:05,906 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  2078. 2025-05-17 22:09:05,906 - sglang - INFO -
  2079. 2025-05-17 22:09:05,906 - __main__ - INFO -
  2080. 2025-05-17 22:09:05,907 - sglang - INFO - During handling of the above exception, another exception occurred:
  2081. 2025-05-17 22:09:05,907 - __main__ - INFO - During handling of the above exception, another exception occurred:
  2082. 2025-05-17 22:09:05,907 - sglang - INFO -
  2083. 2025-05-17 22:09:05,907 - __main__ - INFO -
  2084. 2025-05-17 22:09:05,907 - sglang - INFO - Traceback (most recent call last):
  2085. 2025-05-17 22:09:05,907 - __main__ - INFO - Traceback (most recent call last):
  2086. 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  2087. 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  2088. 2025-05-17 22:09:05,907 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  2089. 2025-05-17 22:09:05,907 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  2090. 2025-05-17 22:09:05,907 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2091. 2025-05-17 22:09:05,907 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2092. 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  2093. 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  2094. 2025-05-17 22:09:05,907 - sglang - INFO - self.tp_worker = TpWorkerClass(
  2095. 2025-05-17 22:09:05,907 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  2096. 2025-05-17 22:09:05,907 - sglang - INFO - ^^^^^^^^^^^^^^
  2097. 2025-05-17 22:09:05,907 - __main__ - INFO - ^^^^^^^^^^^^^^
  2098. 2025-05-17 22:09:05,907 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  2099. 2025-05-17 22:09:05,907 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  2100. 2025-05-17 22:09:05,907 - sglang - INFO - self.model_runner = ModelRunner(
  2101. 2025-05-17 22:09:05,907 - __main__ - INFO - self.model_runner = ModelRunner(
  2102. 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^^^
  2103. 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^^^
  2104. 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  2105. 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  2106. 2025-05-17 22:09:05,908 - sglang - INFO - self.load_model()
  2107. 2025-05-17 22:09:05,908 - __main__ - INFO - self.load_model()
  2108. 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  2109. 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  2110. 2025-05-17 22:09:05,908 - sglang - INFO - self.model = get_model(
  2111. 2025-05-17 22:09:05,908 - __main__ - INFO - self.model = get_model(
  2112. 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^
  2113. 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^
  2114. 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  2115. 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  2116. 2025-05-17 22:09:05,908 - sglang - INFO - return loader.load_model(
  2117. 2025-05-17 22:09:05,908 - __main__ - INFO - return loader.load_model(
  2118. 2025-05-17 22:09:05,908 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  2119. 2025-05-17 22:09:05,908 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  2120. 2025-05-17 22:09:05,908 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  2121. 2025-05-17 22:09:05,908 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  2122. 2025-05-17 22:09:05,908 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  2123. 2025-05-17 22:09:05,908 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  2124. 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  2125. 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  2126. 2025-05-17 22:09:05,909 - sglang - INFO - for name, loaded_weight in weights:
  2127. 2025-05-17 22:09:05,909 - __main__ - INFO - for name, loaded_weight in weights:
  2128. 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  2129. 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  2130. 2025-05-17 22:09:05,909 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  2131. 2025-05-17 22:09:05,909 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  2132. 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2133. 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2134. 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  2135. 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  2136. 2025-05-17 22:09:05,909 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  2137. 2025-05-17 22:09:05,909 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  2138. 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  2139. 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  2140. 2025-05-17 22:09:05,909 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  2141. 2025-05-17 22:09:05,909 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  2142. 2025-05-17 22:09:05,909 - sglang - INFO - hf_folder = download_weights_from_hf(
  2143. 2025-05-17 22:09:05,909 - __main__ - INFO - hf_folder = download_weights_from_hf(
  2144. 2025-05-17 22:09:05,909 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  2145. 2025-05-17 22:09:05,909 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  2146. 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  2147. 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  2148. 2025-05-17 22:09:05,910 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  2149. 2025-05-17 22:09:05,910 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  2150. 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2151. 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2152. 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  2153. 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  2154. 2025-05-17 22:09:05,910 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  2155. 2025-05-17 22:09:05,910 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  2156. 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2157. 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2158. 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  2159. 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  2160. 2025-05-17 22:09:05,910 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  2161. 2025-05-17 22:09:05,910 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  2162. 2025-05-17 22:09:05,910 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2163. 2025-05-17 22:09:05,910 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2164. 2025-05-17 22:09:05,910 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  2165. 2025-05-17 22:09:05,910 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  2166. 2025-05-17 22:09:05,910 - sglang - INFO - self._api.repo_info(
  2167. 2025-05-17 22:09:05,910 - __main__ - INFO - self._api.repo_info(
  2168. 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  2169. 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  2170. 2025-05-17 22:09:05,911 - sglang - INFO - return fn(*args, **kwargs)
  2171. 2025-05-17 22:09:05,911 - __main__ - INFO - return fn(*args, **kwargs)
  2172. 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  2173. 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  2174. 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  2175. 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  2176. 2025-05-17 22:09:05,911 - sglang - INFO - return method(
  2177. 2025-05-17 22:09:05,911 - __main__ - INFO - return method(
  2178. 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^
  2179. 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^
  2180. 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  2181. 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  2182. 2025-05-17 22:09:05,911 - sglang - INFO - return fn(*args, **kwargs)
  2183. 2025-05-17 22:09:05,911 - __main__ - INFO - return fn(*args, **kwargs)
  2184. 2025-05-17 22:09:05,911 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  2185. 2025-05-17 22:09:05,911 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  2186. 2025-05-17 22:09:05,911 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  2187. 2025-05-17 22:09:05,911 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  2188. 2025-05-17 22:09:05,911 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  2189. 2025-05-17 22:09:05,912 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  2190. 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2191. 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2192. 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  2193. 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  2194. 2025-05-17 22:09:05,912 - sglang - INFO - return self.request("GET", url, **kwargs)
  2195. 2025-05-17 22:09:05,912 - __main__ - INFO - return self.request("GET", url, **kwargs)
  2196. 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2197. 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2198. 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  2199. 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  2200. 2025-05-17 22:09:05,912 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  2201. 2025-05-17 22:09:05,912 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  2202. 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2203. 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2204. 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  2205. 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  2206. 2025-05-17 22:09:05,912 - sglang - INFO - r = adapter.send(request, **kwargs)
  2207. 2025-05-17 22:09:05,912 - __main__ - INFO - r = adapter.send(request, **kwargs)
  2208. 2025-05-17 22:09:05,912 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2209. 2025-05-17 22:09:05,912 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2210. 2025-05-17 22:09:05,912 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  2211. 2025-05-17 22:09:05,912 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  2212. 2025-05-17 22:09:05,913 - sglang - INFO - return super().send(request, *args, **kwargs)
  2213. 2025-05-17 22:09:05,913 - __main__ - INFO - return super().send(request, *args, **kwargs)
  2214. 2025-05-17 22:09:05,913 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2215. 2025-05-17 22:09:05,913 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2216. 2025-05-17 22:09:05,913 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  2217. 2025-05-17 22:09:05,913 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  2218. 2025-05-17 22:09:05,913 - sglang - INFO - raise ConnectionError(e, request=request)
  2219. 2025-05-17 22:09:05,913 - __main__ - INFO - raise ConnectionError(e, request=request)
  2220. 2025-05-17 22:09:05,913 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 385785cb-3404-4e73-aefc-9a748405b66f)')
  2221. 2025-05-17 22:09:05,913 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1c84533910>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 385785cb-3404-4e73-aefc-9a748405b66f)')
  2222. 2025-05-17 22:09:05,913 - sglang - INFO -
  2223. 2025-05-17 22:09:05,913 - __main__ - INFO -
  2224. 2025-05-17 22:09:05,913 - sglang - INFO - [2025-05-17 22:09:05] Received sigquit from a child proces. It usually means the child failed.
  2225. 2025-05-17 22:09:05,913 - __main__ - INFO - [2025-05-17 22:09:05] Received sigquit from a child proces. It usually means the child failed.
  2226. 2025-05-17 22:09:06,062 - __main__ - WARNING - SGLang server task ended
  2227. 2025-05-17 22:09:06,600 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  2228. 2025-05-17 22:09:07,669 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  2229. 2025-05-17 22:09:08,728 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  2230. 2025-05-17 22:09:09,792 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
  2231. 2025-05-17 22:09:10,394 - __main__ - INFO - Got cancellation request for SGLang server
  2232. 2025-05-17 22:09:30,402 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2233. 2025-05-17 22:09:30,402 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2234. 2025-05-17 22:09:30,402 - __main__ - INFO - Found 1 total pdf paths to add
  2235. 2025-05-17 22:09:30,405 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2236. 2025-05-17 22:09:30,623 - __main__ - INFO - Starting pipeline with PID 401355
  2237. 2025-05-17 22:09:30,623 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2238. 2025-05-17 22:09:35,724 - __main__ - INFO - No work to do, exiting
  2239. 2025-05-17 22:10:16,045 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2240. 2025-05-17 22:10:16,045 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491009/input.pdf as PDF document
  2241. 2025-05-17 22:10:16,045 - __main__ - INFO - Found 1 total pdf paths to add
  2242. 2025-05-17 22:10:16,048 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2243. 2025-05-17 22:10:16,256 - __main__ - INFO - Starting pipeline with PID 401510
  2244. 2025-05-17 22:10:16,256 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2245. 2025-05-17 22:10:21,469 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2246. 2025-05-17 22:10:22,511 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2247. 2025-05-17 22:10:23,556 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  2248. 2025-05-17 22:10:24,608 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  2249. 2025-05-17 22:10:25,675 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  2250. 2025-05-17 22:10:26,816 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  2251. 2025-05-17 22:10:27,868 - sglang - INFO - [2025-05-17 22:10:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=907351504, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2252. 2025-05-17 22:10:27,868 - __main__ - INFO - [2025-05-17 22:10:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=907351504, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2253. 2025-05-17 22:10:27,869 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  2254. 2025-05-17 22:10:28,903 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  2255. 2025-05-17 22:10:29,963 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  2256. 2025-05-17 22:10:31,030 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  2257. 2025-05-17 22:10:32,098 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  2258. 2025-05-17 22:10:33,166 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  2259. 2025-05-17 22:10:34,232 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  2260. 2025-05-17 22:10:35,266 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  2261. 2025-05-17 22:10:36,326 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  2262. 2025-05-17 22:10:37,398 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  2263. 2025-05-17 22:10:38,466 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  2264. 2025-05-17 22:10:39,529 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  2265. 2025-05-17 22:10:40,585 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  2266. 2025-05-17 22:10:41,651 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  2267. 2025-05-17 22:10:42,718 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  2268. 2025-05-17 22:10:43,747 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  2269. 2025-05-17 22:10:44,782 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  2270. 2025-05-17 22:10:45,845 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  2271. 2025-05-17 22:10:46,913 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  2272. 2025-05-17 22:10:47,981 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  2273. 2025-05-17 22:10:49,050 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  2274. 2025-05-17 22:10:50,118 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  2275. 2025-05-17 22:10:51,187 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  2276. 2025-05-17 22:10:52,255 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  2277. 2025-05-17 22:10:53,323 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  2278. 2025-05-17 22:10:54,391 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  2279. 2025-05-17 22:10:55,459 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  2280. 2025-05-17 22:10:56,527 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  2281. 2025-05-17 22:10:57,590 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  2282. 2025-05-17 22:10:58,647 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  2283. 2025-05-17 22:10:59,714 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  2284. 2025-05-17 22:11:00,781 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  2285. 2025-05-17 22:11:01,848 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  2286. 2025-05-17 22:11:02,919 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  2287. 2025-05-17 22:11:03,954 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  2288. 2025-05-17 22:11:05,021 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  2289. 2025-05-17 22:11:06,088 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  2290. 2025-05-17 22:11:07,156 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  2291. 2025-05-17 22:11:08,223 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  2292. 2025-05-17 22:11:09,291 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  2293. 2025-05-17 22:11:10,363 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  2294. 2025-05-17 22:11:11,419 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  2295. 2025-05-17 22:11:12,487 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  2296. 2025-05-17 22:11:13,555 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  2297. 2025-05-17 22:11:14,628 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  2298. 2025-05-17 22:11:15,684 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  2299. 2025-05-17 22:11:16,747 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  2300. 2025-05-17 22:11:17,814 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  2301. 2025-05-17 22:11:18,880 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  2302. 2025-05-17 22:11:19,948 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  2303. 2025-05-17 22:11:21,015 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  2304. 2025-05-17 22:11:22,091 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  2305. 2025-05-17 22:11:23,155 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  2306. 2025-05-17 22:11:24,228 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  2307. 2025-05-17 22:11:25,295 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  2308. 2025-05-17 22:11:26,362 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  2309. 2025-05-17 22:11:27,430 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  2310. 2025-05-17 22:11:28,498 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  2311. 2025-05-17 22:11:29,562 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  2312. 2025-05-17 22:11:30,629 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  2313. 2025-05-17 22:11:31,693 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  2314. 2025-05-17 22:11:32,745 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  2315. 2025-05-17 22:11:33,811 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  2316. 2025-05-17 22:11:34,880 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  2317. 2025-05-17 22:11:35,947 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  2318. 2025-05-17 22:11:37,014 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  2319. 2025-05-17 22:11:38,082 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  2320. 2025-05-17 22:11:39,150 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  2321. 2025-05-17 22:11:40,218 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  2322. 2025-05-17 22:11:41,287 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
  2323. 2025-05-17 22:11:42,354 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
  2324. 2025-05-17 22:11:43,423 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
  2325. 2025-05-17 22:11:44,495 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
  2326. 2025-05-17 22:11:45,567 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
  2327. 2025-05-17 22:11:46,639 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
  2328. 2025-05-17 22:11:47,707 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
  2329. 2025-05-17 22:11:48,771 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
  2330. 2025-05-17 22:11:49,826 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
  2331. 2025-05-17 22:11:50,893 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
  2332. 2025-05-17 22:11:51,962 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
  2333. 2025-05-17 22:11:53,028 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
  2334. 2025-05-17 22:11:54,096 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
  2335. 2025-05-17 22:11:55,163 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
  2336. 2025-05-17 22:11:56,231 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
  2337. 2025-05-17 22:11:57,304 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
  2338. 2025-05-17 22:11:58,372 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
  2339. 2025-05-17 22:11:59,441 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
  2340. 2025-05-17 22:12:00,509 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
  2341. 2025-05-17 22:12:01,577 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
  2342. 2025-05-17 22:12:02,645 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
  2343. 2025-05-17 22:12:03,713 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
  2344. 2025-05-17 22:12:04,323 - sglang - INFO - [2025-05-17 22:12:04] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2345. 2025-05-17 22:12:04,323 - __main__ - INFO - [2025-05-17 22:12:04] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2346. 2025-05-17 22:12:04,790 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
  2347. 2025-05-17 22:12:05,857 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
  2348. 2025-05-17 22:12:06,909 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
  2349. 2025-05-17 22:12:07,976 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
  2350. 2025-05-17 22:12:09,043 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
  2351. 2025-05-17 22:12:10,111 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
  2352. 2025-05-17 22:12:10,225 - sglang - INFO - [2025-05-17 22:12:10 TP0] Overlap scheduler is disabled for multimodal models.
  2353. 2025-05-17 22:12:10,225 - __main__ - INFO - [2025-05-17 22:12:10 TP0] Overlap scheduler is disabled for multimodal models.
  2354. 2025-05-17 22:12:11,188 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
  2355. 2025-05-17 22:12:12,256 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
  2356. 2025-05-17 22:12:13,325 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
  2357. 2025-05-17 22:12:14,393 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
  2358. 2025-05-17 22:12:15,462 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
  2359. 2025-05-17 22:12:16,531 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
  2360. 2025-05-17 22:12:17,604 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
  2361. 2025-05-17 22:12:18,672 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
  2362. 2025-05-17 22:12:19,740 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
  2363. 2025-05-17 22:12:20,809 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
  2364. 2025-05-17 22:12:21,875 - __main__ - WARNING - Attempt 114: Please wait for sglang server to become ready...
  2365. 2025-05-17 22:12:22,936 - __main__ - WARNING - Attempt 115: Please wait for sglang server to become ready...
  2366. 2025-05-17 22:12:23,989 - __main__ - WARNING - Attempt 116: Please wait for sglang server to become ready...
  2367. 2025-05-17 22:12:25,051 - __main__ - WARNING - Attempt 117: Please wait for sglang server to become ready...
  2368. 2025-05-17 22:12:26,120 - __main__ - WARNING - Attempt 118: Please wait for sglang server to become ready...
  2369. 2025-05-17 22:12:27,188 - __main__ - WARNING - Attempt 119: Please wait for sglang server to become ready...
  2370. 2025-05-17 22:12:28,152 - __main__ - INFO - Got cancellation request for SGLang server
  2371. 2025-05-17 22:13:07,270 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2372. 2025-05-17 22:13:07,271 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491180/input.pdf as PDF document
  2373. 2025-05-17 22:13:07,271 - __main__ - INFO - Found 1 total pdf paths to add
  2374. 2025-05-17 22:13:07,273 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2375. 2025-05-17 22:13:07,567 - __main__ - INFO - Starting pipeline with PID 402318
  2376. 2025-05-17 22:13:07,567 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2377. 2025-05-17 22:15:43,322 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2378. 2025-05-17 22:15:43,322 - __main__ - INFO - Loading file at olmocr_workspace/job_1747491337/input.pdf as PDF document
  2379. 2025-05-17 22:15:43,322 - __main__ - INFO - Found 1 total pdf paths to add
  2380. 2025-05-17 22:15:43,324 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2381. 2025-05-17 22:15:43,625 - __main__ - INFO - Starting pipeline with PID 402524
  2382. 2025-05-17 22:15:43,625 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2383. 2025-05-17 22:17:59,962 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2384. 2025-05-17 22:18:01,003 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2385. 2025-05-17 22:18:01,262 - __main__ - INFO - Got cancellation request for SGLang server
  2386. 2025-05-17 22:21:02,917 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2387. 2025-05-17 22:21:02,918 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2388. 2025-05-17 22:21:02,918 - __main__ - INFO - Found 1 total pdf paths to add
  2389. 2025-05-17 22:21:02,920 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2390. 2025-05-17 22:21:03,172 - __main__ - INFO - Starting pipeline with PID 404165
  2391. 2025-05-17 22:21:03,172 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2392. 2025-05-17 22:23:19,332 - __main__ - INFO - No work to do, exiting
  2393. 2025-05-17 22:27:55,694 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2394. 2025-05-17 22:27:55,694 - __main__ - INFO - Loading file at olmocr_workspace/job_1747492069/input.pdf as PDF document
  2395. 2025-05-17 22:27:55,694 - __main__ - INFO - Found 1 total pdf paths to add
  2396. 2025-05-17 22:27:55,696 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2397. 2025-05-17 22:27:55,922 - __main__ - INFO - Starting pipeline with PID 404544
  2398. 2025-05-17 22:27:55,922 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2399. 2025-05-17 22:30:11,099 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2400. 2025-05-17 22:30:12,145 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2401. 2025-05-17 22:30:13,201 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  2402. 2025-05-17 22:30:14,262 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  2403. 2025-05-17 22:30:15,329 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  2404. 2025-05-17 22:30:16,395 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  2405. 2025-05-17 22:30:17,281 - sglang - INFO - [2025-05-17 22:30:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=403061725, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2406. 2025-05-17 22:30:17,281 - __main__ - INFO - [2025-05-17 22:30:17] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=403061725, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2407. 2025-05-17 22:30:17,553 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  2408. 2025-05-17 22:30:18,621 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  2409. 2025-05-17 22:30:19,665 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  2410. 2025-05-17 22:30:20,711 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  2411. 2025-05-17 22:30:21,758 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  2412. 2025-05-17 22:30:22,798 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  2413. 2025-05-17 22:30:23,842 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  2414. 2025-05-17 22:30:24,878 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  2415. 2025-05-17 22:30:25,941 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  2416. 2025-05-17 22:30:27,008 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  2417. 2025-05-17 22:30:28,077 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  2418. 2025-05-17 22:30:29,145 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  2419. 2025-05-17 22:30:30,209 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  2420. 2025-05-17 22:30:31,261 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  2421. 2025-05-17 22:30:32,328 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  2422. 2025-05-17 22:30:33,391 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  2423. 2025-05-17 22:30:34,459 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  2424. 2025-05-17 22:30:35,526 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  2425. 2025-05-17 22:30:36,595 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  2426. 2025-05-17 22:30:37,663 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  2427. 2025-05-17 22:30:38,736 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  2428. 2025-05-17 22:30:39,803 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  2429. 2025-05-17 22:30:40,873 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  2430. 2025-05-17 22:30:41,941 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  2431. 2025-05-17 22:30:43,011 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  2432. 2025-05-17 22:30:44,084 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  2433. 2025-05-17 22:30:45,152 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  2434. 2025-05-17 22:30:46,217 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  2435. 2025-05-17 22:30:47,277 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  2436. 2025-05-17 22:30:48,330 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  2437. 2025-05-17 22:30:49,393 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  2438. 2025-05-17 22:30:50,460 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  2439. 2025-05-17 22:30:51,527 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  2440. 2025-05-17 22:30:52,600 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  2441. 2025-05-17 22:30:53,668 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  2442. 2025-05-17 22:30:54,735 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  2443. 2025-05-17 22:30:55,804 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  2444. 2025-05-17 22:30:56,872 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  2445. 2025-05-17 22:30:57,940 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  2446. 2025-05-17 22:30:59,004 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  2447. 2025-05-17 22:31:00,072 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  2448. 2025-05-17 22:31:01,140 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  2449. 2025-05-17 22:31:02,208 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  2450. 2025-05-17 22:31:03,275 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  2451. 2025-05-17 22:31:04,339 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  2452. 2025-05-17 22:31:05,392 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  2453. 2025-05-17 22:31:06,455 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  2454. 2025-05-17 22:31:07,527 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  2455. 2025-05-17 22:31:08,595 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  2456. 2025-05-17 22:31:09,667 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  2457. 2025-05-17 22:31:10,735 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  2458. 2025-05-17 22:31:11,808 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  2459. 2025-05-17 22:31:12,879 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  2460. 2025-05-17 22:31:13,952 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  2461. 2025-05-17 22:31:15,020 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  2462. 2025-05-17 22:31:16,088 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  2463. 2025-05-17 22:31:17,156 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  2464. 2025-05-17 22:31:18,224 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  2465. 2025-05-17 22:31:19,292 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  2466. 2025-05-17 22:31:20,356 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  2467. 2025-05-17 22:31:21,416 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  2468. 2025-05-17 22:31:22,469 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  2469. 2025-05-17 22:31:23,532 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  2470. 2025-05-17 22:31:24,599 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  2471. 2025-05-17 22:31:25,671 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  2472. 2025-05-17 22:31:26,739 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  2473. 2025-05-17 22:31:27,807 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  2474. 2025-05-17 22:31:28,876 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  2475. 2025-05-17 22:31:29,945 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  2476. 2025-05-17 22:31:31,013 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
  2477. 2025-05-17 22:31:31,749 - __main__ - INFO - Got cancellation request for SGLang server
  2478. 2025-05-17 22:31:40,102 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2479. 2025-05-17 22:31:40,102 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2480. 2025-05-17 22:31:40,102 - __main__ - INFO - Found 1 total pdf paths to add
  2481. 2025-05-17 22:31:40,106 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2482. 2025-05-17 22:31:40,324 - __main__ - INFO - Starting pipeline with PID 405667
  2483. 2025-05-17 22:31:40,324 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2484. 2025-05-17 22:34:59,610 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2485. 2025-05-17 22:34:59,610 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2486. 2025-05-17 22:34:59,610 - __main__ - INFO - Found 1 total pdf paths to add
  2487. 2025-05-17 22:34:59,613 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2488. 2025-05-17 22:34:59,876 - __main__ - INFO - Starting pipeline with PID 405958
  2489. 2025-05-17 22:34:59,876 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2490. 2025-05-17 22:37:14,916 - __main__ - INFO - No work to do, exiting
  2491. 2025-05-17 22:45:58,117 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2492. 2025-05-17 22:45:58,118 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2493. 2025-05-17 22:45:58,118 - __main__ - INFO - Found 1 total pdf paths to add
  2494. 2025-05-17 22:45:58,121 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2495. 2025-05-17 22:45:58,379 - __main__ - INFO - Starting pipeline with PID 406610
  2496. 2025-05-17 22:45:58,379 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2497. 2025-05-17 22:48:14,372 - __main__ - INFO - No work to do, exiting
  2498. 2025-05-17 22:48:42,758 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2499. 2025-05-17 22:48:42,758 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2500. 2025-05-17 22:48:42,758 - __main__ - INFO - Found 1 total pdf paths to add
  2501. 2025-05-17 22:48:42,762 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2502. 2025-05-17 22:48:43,037 - __main__ - INFO - Starting pipeline with PID 407353
  2503. 2025-05-17 22:48:43,037 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2504. 2025-05-17 22:51:38,663 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2505. 2025-05-17 22:51:38,663 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2506. 2025-05-17 22:51:38,663 - __main__ - INFO - Found 1 total pdf paths to add
  2507. 2025-05-17 22:51:38,666 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2508. 2025-05-17 22:51:38,887 - __main__ - INFO - Starting pipeline with PID 407920
  2509. 2025-05-17 22:51:38,887 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2510. 2025-05-17 22:51:44,965 - __main__ - INFO - No work to do, exiting
  2511. 2025-05-17 22:52:53,522 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2512. 2025-05-17 22:52:53,522 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493567/input.pdf as PDF document
  2513. 2025-05-17 22:52:53,522 - __main__ - INFO - Found 1 total pdf paths to add
  2514. 2025-05-17 22:52:53,524 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2515. 2025-05-17 22:52:53,743 - __main__ - INFO - Starting pipeline with PID 408294
  2516. 2025-05-17 22:52:53,743 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2517. 2025-05-17 22:52:59,333 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2518. 2025-05-17 22:53:00,367 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2519. 2025-05-17 22:53:01,421 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  2520. 2025-05-17 22:53:02,488 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  2521. 2025-05-17 22:53:03,554 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  2522. 2025-05-17 22:53:04,629 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  2523. 2025-05-17 22:53:05,307 - sglang - INFO - [2025-05-17 22:53:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=6928412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2524. 2025-05-17 22:53:05,307 - __main__ - INFO - [2025-05-17 22:53:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=6928412, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2525. 2025-05-17 22:53:05,699 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  2526. 2025-05-17 22:53:06,767 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  2527. 2025-05-17 22:53:07,823 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  2528. 2025-05-17 22:53:08,864 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  2529. 2025-05-17 22:53:09,904 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  2530. 2025-05-17 22:53:10,937 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  2531. 2025-05-17 22:53:11,990 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  2532. 2025-05-17 22:53:13,053 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  2533. 2025-05-17 22:53:14,099 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  2534. 2025-05-17 22:53:14,330 - sglang - INFO - [2025-05-17 22:53:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2535. 2025-05-17 22:53:14,330 - __main__ - INFO - [2025-05-17 22:53:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2536. 2025-05-17 22:53:15,167 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  2537. 2025-05-17 22:53:16,235 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  2538. 2025-05-17 22:53:17,295 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  2539. 2025-05-17 22:53:18,366 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  2540. 2025-05-17 22:53:19,429 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  2541. 2025-05-17 22:53:20,484 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  2542. 2025-05-17 22:53:20,626 - sglang - INFO - [2025-05-17 22:53:20 TP0] Overlap scheduler is disabled for multimodal models.
  2543. 2025-05-17 22:53:20,626 - __main__ - INFO - [2025-05-17 22:53:20 TP0] Overlap scheduler is disabled for multimodal models.
  2544. 2025-05-17 22:53:21,154 - sglang - INFO - [2025-05-17 22:53:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2545. 2025-05-17 22:53:21,154 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2546. 2025-05-17 22:53:21,155 - sglang - INFO - [2025-05-17 22:53:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2547. 2025-05-17 22:53:21,155 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2548. 2025-05-17 22:53:21,155 - sglang - INFO - [2025-05-17 22:53:21 TP0] Init torch distributed begin.
  2549. 2025-05-17 22:53:21,155 - __main__ - INFO - [2025-05-17 22:53:21 TP0] Init torch distributed begin.
  2550. 2025-05-17 22:53:21,562 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  2551. 2025-05-17 22:53:22,629 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  2552. 2025-05-17 22:53:23,706 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  2553. 2025-05-17 22:53:24,766 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  2554. 2025-05-17 22:53:25,840 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  2555. 2025-05-17 22:53:26,579 - sglang - INFO - [2025-05-17 22:53:26 TP0] Load weight begin. avail mem=23.33 GB
  2556. 2025-05-17 22:53:26,580 - __main__ - INFO - [2025-05-17 22:53:26 TP0] Load weight begin. avail mem=23.33 GB
  2557. 2025-05-17 22:53:26,916 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  2558. 2025-05-17 22:53:27,637 - sglang - INFO - [2025-05-17 22:53:27 TP0] Using model weights format ['*.safetensors']
  2559. 2025-05-17 22:53:27,637 - __main__ - INFO - [2025-05-17 22:53:27 TP0] Using model weights format ['*.safetensors']
  2560. 2025-05-17 22:53:27,992 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  2561. 2025-05-17 22:53:28,214 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2562. 2025-05-17 22:53:28,214 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2563. 2025-05-17 22:53:28,505 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  2564. 2025-05-17 22:53:28,506 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  2565. 2025-05-17 22:53:29,069 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  2566. 2025-05-17 22:53:29,476 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.45it/s]
  2567. 2025-05-17 22:53:29,476 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.45it/s]
  2568. 2025-05-17 22:53:30,140 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  2569. 2025-05-17 22:53:30,484 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.20it/s]
  2570. 2025-05-17 22:53:30,484 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.20it/s]
  2571. 2025-05-17 22:53:31,217 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  2572. 2025-05-17 22:53:31,433 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  2573. 2025-05-17 22:53:31,434 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  2574. 2025-05-17 22:53:31,434 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.24it/s]
  2575. 2025-05-17 22:53:31,434 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.24it/s]
  2576. 2025-05-17 22:53:31,434 - sglang - INFO -
  2577. 2025-05-17 22:53:31,434 - __main__ - INFO -
  2578. 2025-05-17 22:53:31,581 - sglang - INFO - [2025-05-17 22:53:31 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2579. 2025-05-17 22:53:31,582 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2580. 2025-05-17 22:53:31,589 - sglang - INFO - [2025-05-17 22:53:31 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2581. 2025-05-17 22:53:31,589 - __main__ - INFO - [2025-05-17 22:53:31 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2582. 2025-05-17 22:53:31,589 - sglang - INFO - [2025-05-17 22:53:31 TP0] Memory pool end. avail mem=5.30 GB
  2583. 2025-05-17 22:53:31,590 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Memory pool end. avail mem=5.30 GB
  2584. 2025-05-17 22:53:31,754 - sglang - INFO - [2025-05-17 22:53:31 TP0] Capture cuda graph begin. This can take up to several minutes.
  2585. 2025-05-17 22:53:31,755 - __main__ - INFO - [2025-05-17 22:53:31 TP0] Capture cuda graph begin. This can take up to several minutes.
  2586. 2025-05-17 22:53:32,294 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  2587. 2025-05-17 22:53:33,328 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  2588. 2025-05-17 22:53:33,770 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.04s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.07it/s] 100%|██████████| 4/4 [00:02<00:00, 2.36it/s] 100%|██████████| 4/4 [00:02<00:00, 1.99it/s]
  2589. 2025-05-17 22:53:33,771 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.04s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.07it/s] 100%|██████████| 4/4 [00:02<00:00, 2.36it/s] 100%|██████████| 4/4 [00:02<00:00, 1.99it/s]
  2590. 2025-05-17 22:53:33,771 - sglang - INFO - [2025-05-17 22:53:33 TP0] Capture cuda graph end. Time elapsed: 2.02 s
  2591. 2025-05-17 22:53:33,771 - __main__ - INFO - [2025-05-17 22:53:33 TP0] Capture cuda graph end. Time elapsed: 2.02 s
  2592. 2025-05-17 22:53:34,364 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  2593. 2025-05-17 22:53:35,424 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  2594. 2025-05-17 22:53:36,307 - sglang - INFO - [2025-05-17 22:53:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2595. 2025-05-17 22:53:36,308 - __main__ - INFO - [2025-05-17 22:53:36 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2596. 2025-05-17 22:53:36,506 - __main__ - INFO - sglang server is ready.
  2597. 2025-05-17 22:53:36,506 - __main__ - INFO - Queue remaining: 1
  2598. 2025-05-17 22:53:36,506 - __main__ - INFO -
  2599. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2600. ----------------------------------------------------------------------------------
  2601. 2025-05-17 22:53:36,506 - __main__ - INFO -
  2602. Worker ID
  2603. ---------
  2604. 2025-05-17 22:53:36,506 - __main__ - INFO - Worker 0 processing work item 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
  2605. 2025-05-17 22:53:36,507 - __main__ - INFO - Created all tasks for 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
  2606. 2025-05-17 22:53:36,509 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747493567/input.pdf in worker 0
  2607. 2025-05-17 22:53:37,399 - sglang - INFO - [2025-05-17 22:53:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2608. 2025-05-17 22:53:37,400 - __main__ - INFO - [2025-05-17 22:53:37 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2609. 2025-05-17 22:53:37,400 - __main__ - INFO - sglang running req: 0 queue req: 0
  2610. 2025-05-17 22:53:37,970 - sglang - INFO - [2025-05-17 22:53:37] The server is fired up and ready to roll!
  2611. 2025-05-17 22:53:37,970 - __main__ - INFO - [2025-05-17 22:53:37] The server is fired up and ready to roll!
  2612. 2025-05-17 22:53:42,840 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493567/input.pdf-1
  2613. 2025-05-17 22:53:46,579 - __main__ - INFO - Queue remaining: 0
  2614. 2025-05-17 22:53:46,579 - __main__ - INFO -
  2615. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2616. ----------------------------------------------------------------------------------
  2617. 2025-05-17 22:53:46,579 - __main__ - INFO -
  2618. Worker ID | started
  2619. ----------+--------
  2620. 0 | 1
  2621. 2025-05-17 22:53:56,581 - __main__ - INFO - Queue remaining: 0
  2622. 2025-05-17 22:53:56,581 - __main__ - INFO -
  2623. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2624. ----------------------------------------------------------------------------------
  2625. 2025-05-17 22:53:56,582 - __main__ - INFO -
  2626. Worker ID | started
  2627. ----------+--------
  2628. 0 | 1
  2629. 2025-05-17 22:54:03,354 - sglang - INFO - [2025-05-17 22:54:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2630. 2025-05-17 22:54:03,355 - __main__ - INFO - sglang running req: 0 queue req: 0
  2631. 2025-05-17 22:54:04,789 - sglang - INFO - [2025-05-17 22:54:04 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.40, #queue-req: 0
  2632. 2025-05-17 22:54:04,789 - __main__ - INFO - sglang running req: 1 queue req: 0
  2633. 2025-05-17 22:54:05,619 - sglang - INFO - [2025-05-17 22:54:05 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.18, #queue-req: 0
  2634. 2025-05-17 22:54:05,619 - __main__ - INFO - sglang running req: 1 queue req: 0
  2635. 2025-05-17 22:54:06,442 - sglang - INFO - [2025-05-17 22:54:06 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.58, #queue-req: 0
  2636. 2025-05-17 22:54:06,442 - __main__ - INFO - sglang running req: 1 queue req: 0
  2637. 2025-05-17 22:54:06,583 - __main__ - INFO - Queue remaining: 0
  2638. 2025-05-17 22:54:06,583 - __main__ - INFO -
  2639. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2640. ----------------------------------------------------------------------------------
  2641. 2025-05-17 22:54:06,583 - __main__ - INFO -
  2642. Worker ID | started
  2643. ----------+--------
  2644. 0 | 1
  2645. 2025-05-17 22:54:07,266 - sglang - INFO - [2025-05-17 22:54:07 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.58, #queue-req: 0
  2646. 2025-05-17 22:54:07,266 - __main__ - INFO - sglang running req: 1 queue req: 0
  2647. 2025-05-17 22:54:08,088 - sglang - INFO - [2025-05-17 22:54:08 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.61, #queue-req: 0
  2648. 2025-05-17 22:54:08,089 - __main__ - INFO - sglang running req: 1 queue req: 0
  2649. 2025-05-17 22:54:08,688 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  2650. 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 1 exiting due to empty queue
  2651. 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 2 exiting due to empty queue
  2652. 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 3 exiting due to empty queue
  2653. 2025-05-17 22:54:08,688 - __main__ - INFO - Worker 4 exiting due to empty queue
  2654. 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 5 exiting due to empty queue
  2655. 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 6 exiting due to empty queue
  2656. 2025-05-17 22:54:08,689 - __main__ - INFO - Worker 7 exiting due to empty queue
  2657. 2025-05-17 22:54:08,924 - sglang - INFO - [2025-05-17 22:54:08 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 47.89, #queue-req: 0
  2658. 2025-05-17 22:54:08,924 - __main__ - INFO - sglang running req: 1 queue req: 0
  2659. 2025-05-17 22:54:09,551 - __main__ - INFO - Finished TaskGroup for worker on 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
  2660. 2025-05-17 22:54:09,551 - __main__ - INFO - Got 1 docs for 1689b5b4ef8b4f3a7193fb04a81a958bc3bccb78
  2661. 2025-05-17 22:54:09,552 - __main__ - INFO - Worker 0 exiting due to empty queue
  2662. 2025-05-17 22:54:09,553 - __main__ - INFO - Work done
  2663. 2025-05-17 22:54:09,553 - __main__ - INFO - Got cancellation request for SGLang server
  2664. 2025-05-17 22:55:54,079 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2665. 2025-05-17 22:55:54,079 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  2666. 2025-05-17 22:55:54,079 - __main__ - INFO - Found 1 total pdf paths to add
  2667. 2025-05-17 22:55:54,081 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  2668. 2025-05-17 22:55:54,300 - __main__ - INFO - Starting pipeline with PID 410354
  2669. 2025-05-17 22:55:54,300 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2670. 2025-05-17 22:55:54,768 - __main__ - INFO - No work to do, exiting
  2671. 2025-05-17 22:55:55,833 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2672. 2025-05-17 22:55:55,833 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493749/input.pdf as PDF document
  2673. 2025-05-17 22:55:55,833 - __main__ - INFO - Found 1 total pdf paths to add
  2674. 2025-05-17 22:55:55,837 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  2675. 2025-05-17 22:55:56,045 - __main__ - INFO - Starting pipeline with PID 410436
  2676. 2025-05-17 22:55:56,045 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2677. 2025-05-17 22:55:56,664 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2678. 2025-05-17 22:55:57,702 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2679. 2025-05-17 22:55:58,747 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  2680. 2025-05-17 22:55:59,810 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  2681. 2025-05-17 22:56:00,876 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  2682. 2025-05-17 22:56:02,020 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  2683. 2025-05-17 22:56:02,743 - sglang - INFO - [2025-05-17 22:56:02] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=902798133, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2684. 2025-05-17 22:56:02,743 - __main__ - INFO - [2025-05-17 22:56:02] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=902798133, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2685. 2025-05-17 22:56:03,096 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  2686. 2025-05-17 22:56:04,164 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  2687. 2025-05-17 22:56:05,207 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  2688. 2025-05-17 22:56:06,252 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  2689. 2025-05-17 22:56:07,297 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  2690. 2025-05-17 22:56:08,328 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  2691. 2025-05-17 22:56:09,378 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  2692. 2025-05-17 22:56:10,443 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  2693. 2025-05-17 22:56:11,510 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  2694. 2025-05-17 22:56:12,059 - sglang - INFO - [2025-05-17 22:56:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2695. 2025-05-17 22:56:12,059 - __main__ - INFO - [2025-05-17 22:56:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2696. 2025-05-17 22:56:12,508 - sglang - INFO - [2025-05-17 22:56:12 TP0] Overlap scheduler is disabled for multimodal models.
  2697. 2025-05-17 22:56:12,508 - __main__ - INFO - [2025-05-17 22:56:12 TP0] Overlap scheduler is disabled for multimodal models.
  2698. 2025-05-17 22:56:12,585 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  2699. 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2700. 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2701. 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2702. 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2703. 2025-05-17 22:56:13,003 - sglang - INFO - [2025-05-17 22:56:13 TP0] Init torch distributed begin.
  2704. 2025-05-17 22:56:13,003 - __main__ - INFO - [2025-05-17 22:56:13 TP0] Init torch distributed begin.
  2705. 2025-05-17 22:56:13,663 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  2706. 2025-05-17 22:56:14,732 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  2707. 2025-05-17 22:56:15,781 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  2708. 2025-05-17 22:56:16,844 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  2709. 2025-05-17 22:56:17,910 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  2710. 2025-05-17 22:56:18,327 - sglang - INFO - [2025-05-17 22:56:18 TP0] Load weight begin. avail mem=23.33 GB
  2711. 2025-05-17 22:56:18,328 - __main__ - INFO - [2025-05-17 22:56:18 TP0] Load weight begin. avail mem=23.33 GB
  2712. 2025-05-17 22:56:18,987 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  2713. 2025-05-17 22:56:19,825 - sglang - INFO - [2025-05-17 22:56:19 TP0] Using model weights format ['*.safetensors']
  2714. 2025-05-17 22:56:19,825 - __main__ - INFO - [2025-05-17 22:56:19 TP0] Using model weights format ['*.safetensors']
  2715. 2025-05-17 22:56:20,063 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  2716. 2025-05-17 22:56:20,343 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2717. 2025-05-17 22:56:20,344 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2718. 2025-05-17 22:56:20,620 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.62it/s]
  2719. 2025-05-17 22:56:20,620 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.62it/s]
  2720. 2025-05-17 22:56:21,141 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  2721. 2025-05-17 22:56:21,516 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.56it/s]
  2722. 2025-05-17 22:56:21,516 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.56it/s]
  2723. 2025-05-17 22:56:22,218 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  2724. 2025-05-17 22:56:22,418 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.32it/s]
  2725. 2025-05-17 22:56:22,418 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.32it/s]
  2726. 2025-05-17 22:56:23,296 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  2727. 2025-05-17 22:56:23,310 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
  2728. 2025-05-17 22:56:23,310 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
  2729. 2025-05-17 22:56:23,310 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.35it/s]
  2730. 2025-05-17 22:56:23,310 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.35it/s]
  2731. 2025-05-17 22:56:23,310 - sglang - INFO -
  2732. 2025-05-17 22:56:23,310 - __main__ - INFO -
  2733. 2025-05-17 22:56:23,446 - sglang - INFO - [2025-05-17 22:56:23 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2734. 2025-05-17 22:56:23,446 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2735. 2025-05-17 22:56:23,452 - sglang - INFO - [2025-05-17 22:56:23 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2736. 2025-05-17 22:56:23,452 - __main__ - INFO - [2025-05-17 22:56:23 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2737. 2025-05-17 22:56:23,452 - sglang - INFO - [2025-05-17 22:56:23 TP0] Memory pool end. avail mem=5.30 GB
  2738. 2025-05-17 22:56:23,452 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Memory pool end. avail mem=5.30 GB
  2739. 2025-05-17 22:56:23,607 - sglang - INFO - [2025-05-17 22:56:23 TP0] Capture cuda graph begin. This can take up to several minutes.
  2740. 2025-05-17 22:56:23,607 - __main__ - INFO - [2025-05-17 22:56:23 TP0] Capture cuda graph begin. This can take up to several minutes.
  2741. 2025-05-17 22:56:24,374 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  2742. 2025-05-17 22:56:25,374 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.80it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.36it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  2743. 2025-05-17 22:56:25,374 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.80it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.36it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  2744. 2025-05-17 22:56:25,448 - sglang - INFO - [2025-05-17 22:56:25 TP0] Capture cuda graph end. Time elapsed: 1.77 s
  2745. 2025-05-17 22:56:25,448 - __main__ - INFO - [2025-05-17 22:56:25 TP0] Capture cuda graph end. Time elapsed: 1.77 s
  2746. 2025-05-17 22:56:25,450 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  2747. 2025-05-17 22:56:26,518 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  2748. 2025-05-17 22:56:27,586 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  2749. 2025-05-17 22:56:28,248 - sglang - INFO - [2025-05-17 22:56:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2750. 2025-05-17 22:56:28,249 - __main__ - INFO - [2025-05-17 22:56:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2751. 2025-05-17 22:56:28,677 - __main__ - INFO - sglang server is ready.
  2752. 2025-05-17 22:56:28,678 - __main__ - INFO - Queue remaining: 1
  2753. 2025-05-17 22:56:28,678 - __main__ - INFO -
  2754. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2755. ----------------------------------------------------------------------------------
  2756. 2025-05-17 22:56:28,678 - __main__ - INFO -
  2757. Worker ID
  2758. ---------
  2759. 2025-05-17 22:56:28,678 - __main__ - INFO - Worker 0 processing work item a118967b13fa84e22675b237c5a5c55c4e2ce2bc
  2760. 2025-05-17 22:56:28,678 - __main__ - INFO - Created all tasks for a118967b13fa84e22675b237c5a5c55c4e2ce2bc
  2761. 2025-05-17 22:56:28,684 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747493749/input.pdf in worker 0
  2762. 2025-05-17 22:56:29,342 - sglang - INFO - [2025-05-17 22:56:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2763. 2025-05-17 22:56:29,342 - __main__ - INFO - [2025-05-17 22:56:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2764. 2025-05-17 22:56:29,342 - __main__ - INFO - sglang running req: 0 queue req: 0
  2765. 2025-05-17 22:56:30,276 - sglang - INFO - [2025-05-17 22:56:30] The server is fired up and ready to roll!
  2766. 2025-05-17 22:56:30,276 - __main__ - INFO - [2025-05-17 22:56:30] The server is fired up and ready to roll!
  2767. 2025-05-17 22:56:35,358 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-1
  2768. 2025-05-17 22:56:35,363 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-2
  2769. 2025-05-17 22:56:35,372 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-3
  2770. 2025-05-17 22:56:35,383 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-4
  2771. 2025-05-17 22:56:35,391 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493749/input.pdf-5
  2772. 2025-05-17 22:56:38,680 - __main__ - INFO - Queue remaining: 0
  2773. 2025-05-17 22:56:38,680 - __main__ - INFO -
  2774. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2775. ----------------------------------------------------------------------------------
  2776. 2025-05-17 22:56:38,680 - __main__ - INFO -
  2777. Worker ID | started
  2778. ----------+--------
  2779. 0 | 5
  2780. 2025-05-17 22:56:48,682 - __main__ - INFO - Queue remaining: 0
  2781. 2025-05-17 22:56:48,683 - __main__ - INFO -
  2782. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2783. ----------------------------------------------------------------------------------
  2784. 2025-05-17 22:56:48,683 - __main__ - INFO -
  2785. Worker ID | started
  2786. ----------+--------
  2787. 0 | 5
  2788. 2025-05-17 22:56:55,994 - sglang - INFO - [2025-05-17 22:56:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2789. 2025-05-17 22:56:55,995 - __main__ - INFO - sglang running req: 0 queue req: 0
  2790. 2025-05-17 22:56:56,799 - sglang - INFO - [2025-05-17 22:56:56 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  2791. 2025-05-17 22:56:56,799 - __main__ - INFO - sglang running req: 1 queue req: 0
  2792. 2025-05-17 22:56:58,685 - __main__ - INFO - Queue remaining: 0
  2793. 2025-05-17 22:56:58,685 - __main__ - INFO -
  2794. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2795. ----------------------------------------------------------------------------------
  2796. 2025-05-17 22:56:58,685 - __main__ - INFO -
  2797. Worker ID | started
  2798. ----------+--------
  2799. 0 | 5
  2800. 2025-05-17 22:57:00,260 - sglang - INFO - [2025-05-17 22:57:00 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.37, #queue-req: 0
  2801. 2025-05-17 22:57:00,260 - __main__ - INFO - sglang running req: 5 queue req: 0
  2802. 2025-05-17 22:57:00,988 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  2803. 2025-05-17 22:57:00,988 - __main__ - INFO - Worker 1 exiting due to empty queue
  2804. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 2 exiting due to empty queue
  2805. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 3 exiting due to empty queue
  2806. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 4 exiting due to empty queue
  2807. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 5 exiting due to empty queue
  2808. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 6 exiting due to empty queue
  2809. 2025-05-17 22:57:00,989 - __main__ - INFO - Worker 7 exiting due to empty queue
  2810. 2025-05-17 22:57:01,123 - sglang - INFO - [2025-05-17 22:57:01 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.83, #queue-req: 0
  2811. 2025-05-17 22:57:01,123 - __main__ - INFO - sglang running req: 5 queue req: 0
  2812. 2025-05-17 22:57:01,981 - sglang - INFO - [2025-05-17 22:57:01 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.99, #queue-req: 0
  2813. 2025-05-17 22:57:01,981 - __main__ - INFO - sglang running req: 5 queue req: 0
  2814. 2025-05-17 22:57:02,840 - sglang - INFO - [2025-05-17 22:57:02 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.71, #queue-req: 0
  2815. 2025-05-17 22:57:02,841 - __main__ - INFO - sglang running req: 5 queue req: 0
  2816. 2025-05-17 22:57:03,705 - sglang - INFO - [2025-05-17 22:57:03 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 231.30, #queue-req: 0
  2817. 2025-05-17 22:57:03,705 - __main__ - INFO - sglang running req: 5 queue req: 0
  2818. 2025-05-17 22:57:04,570 - sglang - INFO - [2025-05-17 22:57:04 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 231.17, #queue-req: 0
  2819. 2025-05-17 22:57:04,570 - __main__ - INFO - sglang running req: 5 queue req: 0
  2820. 2025-05-17 22:57:05,435 - sglang - INFO - [2025-05-17 22:57:05 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.16, #queue-req: 0
  2821. 2025-05-17 22:57:05,435 - __main__ - INFO - sglang running req: 5 queue req: 0
  2822. 2025-05-17 22:57:06,299 - sglang - INFO - [2025-05-17 22:57:06 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.68, #queue-req: 0
  2823. 2025-05-17 22:57:06,299 - __main__ - INFO - sglang running req: 5 queue req: 0
  2824. 2025-05-17 22:57:07,163 - sglang - INFO - [2025-05-17 22:57:07 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 215.18, #queue-req: 0
  2825. 2025-05-17 22:57:07,163 - __main__ - INFO - sglang running req: 3 queue req: 0
  2826. 2025-05-17 22:57:08,012 - sglang - INFO - [2025-05-17 22:57:08 TP0] Decode batch. #running-req: 3, #token: 5146, token usage: 0.14, gen throughput (token/s): 141.30, #queue-req: 0
  2827. 2025-05-17 22:57:08,012 - __main__ - INFO - sglang running req: 3 queue req: 0
  2828. 2025-05-17 22:57:08,687 - __main__ - INFO - Queue remaining: 0
  2829. 2025-05-17 22:57:08,687 - __main__ - INFO -
  2830. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2831. ----------------------------------------------------------------------------------
  2832. sglang_input_tokens 106.67 106.67
  2833. sglang_output_tokens 20.49 20.49
  2834. 2025-05-17 22:57:08,687 - __main__ - INFO -
  2835. Worker ID | finished | started
  2836. ----------+----------+--------
  2837. 0 | 4 | 5
  2838. 2025-05-17 22:57:08,844 - sglang - INFO - [2025-05-17 22:57:08 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 61.35, #queue-req: 0
  2839. 2025-05-17 22:57:08,844 - __main__ - INFO - sglang running req: 1 queue req: 0
  2840. 2025-05-17 22:57:09,672 - sglang - INFO - [2025-05-17 22:57:09 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.30, #queue-req: 0
  2841. 2025-05-17 22:57:09,672 - __main__ - INFO - sglang running req: 1 queue req: 0
  2842. 2025-05-17 22:57:10,463 - __main__ - INFO - Finished TaskGroup for worker on a118967b13fa84e22675b237c5a5c55c4e2ce2bc
  2843. 2025-05-17 22:57:10,463 - __main__ - INFO - Got 1 docs for a118967b13fa84e22675b237c5a5c55c4e2ce2bc
  2844. 2025-05-17 22:57:10,464 - __main__ - INFO - Worker 0 exiting due to empty queue
  2845. 2025-05-17 22:57:10,465 - __main__ - INFO - Work done
  2846. 2025-05-17 22:57:10,465 - __main__ - INFO - Got cancellation request for SGLang server
  2847. 2025-05-17 22:58:44,026 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  2848. 2025-05-17 22:58:44,026 - __main__ - INFO - Loading file at olmocr_workspace/job_1747493917/input.pdf as PDF document
  2849. 2025-05-17 22:58:44,026 - __main__ - INFO - Found 1 total pdf paths to add
  2850. 2025-05-17 22:58:44,032 - __main__ - INFO - Calculated items_per_group: 33 based on average pages per PDF: 15.00
  2851. 2025-05-17 22:58:44,333 - __main__ - INFO - Starting pipeline with PID 412467
  2852. 2025-05-17 22:58:44,333 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  2853. 2025-05-17 22:58:50,937 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  2854. 2025-05-17 22:58:51,978 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  2855. 2025-05-17 22:58:53,023 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  2856. 2025-05-17 22:58:54,089 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  2857. 2025-05-17 22:58:55,158 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  2858. 2025-05-17 22:58:56,228 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  2859. 2025-05-17 22:58:56,597 - sglang - INFO - [2025-05-17 22:58:56] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=807558455, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2860. 2025-05-17 22:58:56,597 - __main__ - INFO - [2025-05-17 22:58:56] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=807558455, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  2861. 2025-05-17 22:58:57,298 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  2862. 2025-05-17 22:58:58,370 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  2863. 2025-05-17 22:58:59,436 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  2864. 2025-05-17 22:59:00,505 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  2865. 2025-05-17 22:59:01,552 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  2866. 2025-05-17 22:59:02,617 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  2867. 2025-05-17 22:59:03,683 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  2868. 2025-05-17 22:59:04,745 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  2869. 2025-05-17 22:59:05,781 - sglang - INFO - [2025-05-17 22:59:05] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2870. 2025-05-17 22:59:05,782 - __main__ - INFO - [2025-05-17 22:59:05] Use chat template for the OpenAI-compatible API server: qwen2-vl
  2871. 2025-05-17 22:59:05,783 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  2872. 2025-05-17 22:59:06,821 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  2873. 2025-05-17 22:59:07,880 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  2874. 2025-05-17 22:59:08,918 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  2875. 2025-05-17 22:59:09,977 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  2876. 2025-05-17 22:59:11,033 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  2877. 2025-05-17 22:59:11,318 - sglang - INFO - [2025-05-17 22:59:11 TP0] Overlap scheduler is disabled for multimodal models.
  2878. 2025-05-17 22:59:11,318 - __main__ - INFO - [2025-05-17 22:59:11 TP0] Overlap scheduler is disabled for multimodal models.
  2879. 2025-05-17 22:59:12,112 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  2880. 2025-05-17 22:59:12,129 - sglang - INFO - [2025-05-17 22:59:12 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2881. 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  2882. 2025-05-17 22:59:12,130 - sglang - INFO - [2025-05-17 22:59:12 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2883. 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  2884. 2025-05-17 22:59:12,130 - sglang - INFO - [2025-05-17 22:59:12 TP0] Init torch distributed begin.
  2885. 2025-05-17 22:59:12,130 - __main__ - INFO - [2025-05-17 22:59:12 TP0] Init torch distributed begin.
  2886. 2025-05-17 22:59:13,187 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  2887. 2025-05-17 22:59:14,258 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  2888. 2025-05-17 22:59:15,325 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  2889. 2025-05-17 22:59:16,392 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  2890. 2025-05-17 22:59:17,459 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  2891. 2025-05-17 22:59:17,497 - sglang - INFO - [2025-05-17 22:59:17 TP0] Load weight begin. avail mem=23.33 GB
  2892. 2025-05-17 22:59:17,497 - __main__ - INFO - [2025-05-17 22:59:17 TP0] Load weight begin. avail mem=23.33 GB
  2893. 2025-05-17 22:59:18,534 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  2894. 2025-05-17 22:59:19,026 - sglang - INFO - [2025-05-17 22:59:19 TP0] Using model weights format ['*.safetensors']
  2895. 2025-05-17 22:59:19,026 - __main__ - INFO - [2025-05-17 22:59:19 TP0] Using model weights format ['*.safetensors']
  2896. 2025-05-17 22:59:19,612 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2897. 2025-05-17 22:59:19,612 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  2898. 2025-05-17 22:59:19,614 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  2899. 2025-05-17 22:59:19,821 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
  2900. 2025-05-17 22:59:19,821 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
  2901. 2025-05-17 22:59:20,693 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  2902. 2025-05-17 22:59:20,764 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  2903. 2025-05-17 22:59:20,764 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  2904. 2025-05-17 22:59:21,773 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
  2905. 2025-05-17 22:59:21,773 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
  2906. 2025-05-17 22:59:21,774 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  2907. 2025-05-17 22:59:22,606 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  2908. 2025-05-17 22:59:22,607 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  2909. 2025-05-17 22:59:22,607 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
  2910. 2025-05-17 22:59:22,607 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
  2911. 2025-05-17 22:59:22,607 - sglang - INFO -
  2912. 2025-05-17 22:59:22,607 - __main__ - INFO -
  2913. 2025-05-17 22:59:22,738 - sglang - INFO - [2025-05-17 22:59:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2914. 2025-05-17 22:59:22,738 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  2915. 2025-05-17 22:59:22,744 - sglang - INFO - [2025-05-17 22:59:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2916. 2025-05-17 22:59:22,744 - __main__ - INFO - [2025-05-17 22:59:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  2917. 2025-05-17 22:59:22,744 - sglang - INFO - [2025-05-17 22:59:22 TP0] Memory pool end. avail mem=5.30 GB
  2918. 2025-05-17 22:59:22,744 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Memory pool end. avail mem=5.30 GB
  2919. 2025-05-17 22:59:22,854 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  2920. 2025-05-17 22:59:22,898 - sglang - INFO - [2025-05-17 22:59:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  2921. 2025-05-17 22:59:22,898 - __main__ - INFO - [2025-05-17 22:59:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  2922. 2025-05-17 22:59:23,933 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  2923. 2025-05-17 22:59:24,663 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.03it/s] 50%|█████ | 2/4 [00:01<00:01, 1.82it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.38it/s] 100%|██████████| 4/4 [00:01<00:00, 2.77it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  2924. 2025-05-17 22:59:24,663 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.03it/s] 50%|█████ | 2/4 [00:01<00:01, 1.82it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.38it/s] 100%|██████████| 4/4 [00:01<00:00, 2.77it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  2925. 2025-05-17 22:59:24,663 - sglang - INFO - [2025-05-17 22:59:24 TP0] Capture cuda graph end. Time elapsed: 1.76 s
  2926. 2025-05-17 22:59:24,663 - __main__ - INFO - [2025-05-17 22:59:24 TP0] Capture cuda graph end. Time elapsed: 1.76 s
  2927. 2025-05-17 22:59:25,014 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  2928. 2025-05-17 22:59:26,070 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  2929. 2025-05-17 22:59:27,135 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  2930. 2025-05-17 22:59:27,961 - sglang - INFO - [2025-05-17 22:59:27 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2931. 2025-05-17 22:59:27,961 - __main__ - INFO - [2025-05-17 22:59:27 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  2932. 2025-05-17 22:59:28,225 - __main__ - INFO - sglang server is ready.
  2933. 2025-05-17 22:59:28,226 - __main__ - INFO - Queue remaining: 1
  2934. 2025-05-17 22:59:28,226 - __main__ - INFO -
  2935. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2936. ----------------------------------------------------------------------------------
  2937. 2025-05-17 22:59:28,226 - __main__ - INFO -
  2938. Worker ID
  2939. ---------
  2940. 2025-05-17 22:59:28,226 - __main__ - INFO - Worker 0 processing work item 02907a3ba6226f0399bbf3080296d8a1a280e502
  2941. 2025-05-17 22:59:28,226 - __main__ - INFO - Created all tasks for 02907a3ba6226f0399bbf3080296d8a1a280e502
  2942. 2025-05-17 22:59:28,235 - __main__ - INFO - Got 15 pages to do for olmocr_workspace/job_1747493917/input.pdf in worker 0
  2943. 2025-05-17 22:59:29,054 - sglang - INFO - [2025-05-17 22:59:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2944. 2025-05-17 22:59:29,054 - __main__ - INFO - [2025-05-17 22:59:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2945. 2025-05-17 22:59:29,054 - __main__ - INFO - sglang running req: 0 queue req: 0
  2946. 2025-05-17 22:59:30,511 - sglang - INFO - [2025-05-17 22:59:30] The server is fired up and ready to roll!
  2947. 2025-05-17 22:59:30,511 - __main__ - INFO - [2025-05-17 22:59:30] The server is fired up and ready to roll!
  2948. 2025-05-17 22:59:35,607 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-1
  2949. 2025-05-17 22:59:35,625 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-3
  2950. 2025-05-17 22:59:35,631 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-2
  2951. 2025-05-17 22:59:35,647 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-4
  2952. 2025-05-17 22:59:35,653 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-5
  2953. 2025-05-17 22:59:35,664 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-6
  2954. 2025-05-17 22:59:35,670 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-7
  2955. 2025-05-17 22:59:35,674 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-8
  2956. 2025-05-17 22:59:35,680 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-9
  2957. 2025-05-17 22:59:35,688 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-12
  2958. 2025-05-17 22:59:35,689 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-10
  2959. 2025-05-17 22:59:35,695 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-13
  2960. 2025-05-17 22:59:35,696 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-14
  2961. 2025-05-17 22:59:35,698 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-11
  2962. 2025-05-17 22:59:35,704 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-15
  2963. 2025-05-17 22:59:38,226 - __main__ - INFO - Queue remaining: 0
  2964. 2025-05-17 22:59:38,227 - __main__ - INFO -
  2965. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2966. ----------------------------------------------------------------------------------
  2967. 2025-05-17 22:59:38,227 - __main__ - INFO -
  2968. Worker ID | started
  2969. ----------+--------
  2970. 0 | 15
  2971. 2025-05-17 22:59:48,279 - __main__ - INFO - Queue remaining: 0
  2972. 2025-05-17 22:59:48,279 - __main__ - INFO -
  2973. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2974. ----------------------------------------------------------------------------------
  2975. 2025-05-17 22:59:48,279 - __main__ - INFO -
  2976. Worker ID | started
  2977. ----------+--------
  2978. 0 | 15
  2979. 2025-05-17 22:59:57,415 - sglang - INFO - [2025-05-17 22:59:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2470, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  2980. 2025-05-17 22:59:57,415 - __main__ - INFO - sglang running req: 0 queue req: 0
  2981. 2025-05-17 22:59:58,280 - __main__ - INFO - Queue remaining: 0
  2982. 2025-05-17 22:59:58,280 - __main__ - INFO -
  2983. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  2984. ----------------------------------------------------------------------------------
  2985. 2025-05-17 22:59:58,280 - __main__ - INFO -
  2986. Worker ID | started
  2987. ----------+--------
  2988. 0 | 15
  2989. 2025-05-17 22:59:58,497 - sglang - INFO - [2025-05-17 22:59:58 TP0] Prefill batch. #new-seq: 6, #new-token: 13288, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 8
  2990. 2025-05-17 22:59:58,498 - __main__ - INFO - sglang running req: 1 queue req: 8
  2991. 2025-05-17 23:00:03,556 - sglang - INFO - [2025-05-17 23:00:03 TP0] Decode batch. #running-req: 7, #token: 15989, token usage: 0.42, gen throughput (token/s): 6.69, #queue-req: 8
  2992. 2025-05-17 23:00:03,556 - __main__ - INFO - sglang running req: 7 queue req: 8
  2993. 2025-05-17 23:00:04,435 - sglang - INFO - [2025-05-17 23:00:04 TP0] Decode batch. #running-req: 7, #token: 16269, token usage: 0.43, gen throughput (token/s): 318.57, #queue-req: 8
  2994. 2025-05-17 23:00:04,435 - __main__ - INFO - sglang running req: 7 queue req: 8
  2995. 2025-05-17 23:00:05,312 - sglang - INFO - [2025-05-17 23:00:05 TP0] Decode batch. #running-req: 7, #token: 16549, token usage: 0.44, gen throughput (token/s): 319.02, #queue-req: 8
  2996. 2025-05-17 23:00:05,313 - __main__ - INFO - sglang running req: 7 queue req: 8
  2997. 2025-05-17 23:00:06,190 - sglang - INFO - [2025-05-17 23:00:06 TP0] Decode batch. #running-req: 7, #token: 16829, token usage: 0.44, gen throughput (token/s): 319.03, #queue-req: 8
  2998. 2025-05-17 23:00:06,190 - __main__ - INFO - sglang running req: 7 queue req: 8
  2999. 2025-05-17 23:00:07,076 - sglang - INFO - [2025-05-17 23:00:07 TP0] Decode batch. #running-req: 7, #token: 17109, token usage: 0.45, gen throughput (token/s): 316.06, #queue-req: 8
  3000. 2025-05-17 23:00:07,076 - __main__ - INFO - sglang running req: 7 queue req: 8
  3001. 2025-05-17 23:00:07,956 - sglang - INFO - [2025-05-17 23:00:07 TP0] Decode batch. #running-req: 7, #token: 17389, token usage: 0.46, gen throughput (token/s): 318.18, #queue-req: 8
  3002. 2025-05-17 23:00:07,956 - __main__ - INFO - sglang running req: 7 queue req: 8
  3003. 2025-05-17 23:00:08,281 - __main__ - INFO - Queue remaining: 0
  3004. 2025-05-17 23:00:08,282 - __main__ - INFO -
  3005. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3006. ----------------------------------------------------------------------------------
  3007. 2025-05-17 23:00:08,282 - __main__ - INFO -
  3008. Worker ID | started
  3009. ----------+--------
  3010. 0 | 15
  3011. 2025-05-17 23:00:08,836 - sglang - INFO - [2025-05-17 23:00:08 TP0] Decode batch. #running-req: 7, #token: 17669, token usage: 0.47, gen throughput (token/s): 318.06, #queue-req: 8
  3012. 2025-05-17 23:00:08,837 - __main__ - INFO - sglang running req: 7 queue req: 8
  3013. 2025-05-17 23:00:09,717 - sglang - INFO - [2025-05-17 23:00:09 TP0] Decode batch. #running-req: 7, #token: 17949, token usage: 0.47, gen throughput (token/s): 317.94, #queue-req: 8
  3014. 2025-05-17 23:00:09,717 - __main__ - INFO - sglang running req: 7 queue req: 8
  3015. 2025-05-17 23:00:10,605 - sglang - INFO - [2025-05-17 23:00:10 TP0] Decode batch. #running-req: 7, #token: 18229, token usage: 0.48, gen throughput (token/s): 315.17, #queue-req: 8
  3016. 2025-05-17 23:00:10,606 - __main__ - INFO - sglang running req: 7 queue req: 8
  3017. 2025-05-17 23:00:11,490 - sglang - INFO - [2025-05-17 23:00:11 TP0] Decode batch. #running-req: 7, #token: 18509, token usage: 0.49, gen throughput (token/s): 316.33, #queue-req: 8
  3018. 2025-05-17 23:00:11,491 - __main__ - INFO - sglang running req: 7 queue req: 8
  3019. 2025-05-17 23:00:12,374 - sglang - INFO - [2025-05-17 23:00:12 TP0] Decode batch. #running-req: 7, #token: 18789, token usage: 0.49, gen throughput (token/s): 317.03, #queue-req: 8
  3020. 2025-05-17 23:00:12,374 - __main__ - INFO - sglang running req: 7 queue req: 8
  3021. 2025-05-17 23:00:13,257 - sglang - INFO - [2025-05-17 23:00:13 TP0] Decode batch. #running-req: 7, #token: 19069, token usage: 0.50, gen throughput (token/s): 316.92, #queue-req: 8
  3022. 2025-05-17 23:00:13,257 - __main__ - INFO - sglang running req: 7 queue req: 8
  3023. 2025-05-17 23:00:13,523 - sglang - INFO - [2025-05-17 23:00:13 TP0] Prefill batch. #new-seq: 3, #new-token: 6276, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.44, #running-req: 6, #queue-req: 5
  3024. 2025-05-17 23:00:13,523 - __main__ - INFO - sglang running req: 6 queue req: 5
  3025. 2025-05-17 23:00:16,343 - sglang - INFO - [2025-05-17 23:00:16 TP0] Decode batch. #running-req: 9, #token: 23262, token usage: 0.61, gen throughput (token/s): 108.57, #queue-req: 5
  3026. 2025-05-17 23:00:16,343 - __main__ - INFO - sglang running req: 9 queue req: 5
  3027. 2025-05-17 23:00:17,294 - sglang - INFO - [2025-05-17 23:00:17 TP0] Decode batch. #running-req: 9, #token: 23622, token usage: 0.62, gen throughput (token/s): 378.50, #queue-req: 5
  3028. 2025-05-17 23:00:17,294 - __main__ - INFO - sglang running req: 9 queue req: 5
  3029. 2025-05-17 23:00:17,460 - sglang - INFO - [2025-05-17 23:00:17 TP0] Prefill batch. #new-seq: 2, #new-token: 3818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 3
  3030. 2025-05-17 23:00:17,460 - __main__ - INFO - sglang running req: 8 queue req: 3
  3031. 2025-05-17 23:00:18,282 - __main__ - INFO - Queue remaining: 0
  3032. 2025-05-17 23:00:18,283 - __main__ - INFO -
  3033. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3034. ----------------------------------------------------------------------------------
  3035. sglang_input_tokens 46.72 46.72
  3036. sglang_output_tokens 11.09 11.09
  3037. 2025-05-17 23:00:18,283 - __main__ - INFO -
  3038. Worker ID | finished | started
  3039. ----------+----------+--------
  3040. 0 | 2 | 15
  3041. 2025-05-17 23:00:19,133 - sglang - INFO - [2025-05-17 23:00:19 TP0] Prefill batch. #new-seq: 2, #new-token: 3881, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.58, #running-req: 9, #queue-req: 1
  3042. 2025-05-17 23:00:19,133 - __main__ - INFO - sglang running req: 9 queue req: 1
  3043. 2025-05-17 23:00:21,030 - sglang - INFO - [2025-05-17 23:00:21 TP0] Decode batch. #running-req: 11, #token: 23317, token usage: 0.61, gen throughput (token/s): 110.28, #queue-req: 1
  3044. 2025-05-17 23:00:21,030 - __main__ - INFO - sglang running req: 11 queue req: 1
  3045. 2025-05-17 23:00:21,054 - sglang - INFO - [2025-05-17 23:00:21 TP0] Prefill batch. #new-seq: 1, #new-token: 1868, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 10, #queue-req: 0
  3046. 2025-05-17 23:00:21,054 - __main__ - INFO - sglang running req: 10 queue req: 0
  3047. 2025-05-17 23:00:21,215 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  3048. 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 1 exiting due to empty queue
  3049. 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 2 exiting due to empty queue
  3050. 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 3 exiting due to empty queue
  3051. 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 4 exiting due to empty queue
  3052. 2025-05-17 23:00:21,216 - __main__ - INFO - Worker 5 exiting due to empty queue
  3053. 2025-05-17 23:00:21,217 - __main__ - INFO - Worker 6 exiting due to empty queue
  3054. 2025-05-17 23:00:21,217 - __main__ - INFO - Worker 7 exiting due to empty queue
  3055. 2025-05-17 23:00:22,713 - sglang - INFO - [2025-05-17 23:00:22 TP0] Decode batch. #running-req: 10, #token: 24380, token usage: 0.64, gen throughput (token/s): 245.41, #queue-req: 0
  3056. 2025-05-17 23:00:22,713 - __main__ - INFO - sglang running req: 10 queue req: 0
  3057. 2025-05-17 23:00:23,666 - sglang - INFO - [2025-05-17 23:00:23 TP0] Decode batch. #running-req: 9, #token: 23456, token usage: 0.62, gen throughput (token/s): 396.29, #queue-req: 0
  3058. 2025-05-17 23:00:23,667 - __main__ - INFO - sglang running req: 9 queue req: 0
  3059. 2025-05-17 23:00:24,620 - sglang - INFO - [2025-05-17 23:00:24 TP0] Decode batch. #running-req: 9, #token: 23816, token usage: 0.63, gen throughput (token/s): 377.68, #queue-req: 0
  3060. 2025-05-17 23:00:24,620 - __main__ - INFO - sglang running req: 9 queue req: 0
  3061. 2025-05-17 23:00:25,579 - sglang - INFO - [2025-05-17 23:00:25 TP0] Decode batch. #running-req: 9, #token: 24176, token usage: 0.64, gen throughput (token/s): 375.28, #queue-req: 0
  3062. 2025-05-17 23:00:25,579 - __main__ - INFO - sglang running req: 9 queue req: 0
  3063. 2025-05-17 23:00:26,481 - sglang - INFO - [2025-05-17 23:00:26 TP0] Decode batch. #running-req: 7, #token: 19555, token usage: 0.51, gen throughput (token/s): 328.25, #queue-req: 0
  3064. 2025-05-17 23:00:26,481 - __main__ - INFO - sglang running req: 7 queue req: 0
  3065. 2025-05-17 23:00:27,373 - sglang - INFO - [2025-05-17 23:00:27 TP0] Decode batch. #running-req: 6, #token: 16590, token usage: 0.44, gen throughput (token/s): 308.05, #queue-req: 0
  3066. 2025-05-17 23:00:27,374 - __main__ - INFO - sglang running req: 6 queue req: 0
  3067. 2025-05-17 23:00:28,248 - sglang - INFO - [2025-05-17 23:00:28 TP0] Decode batch. #running-req: 5, #token: 13573, token usage: 0.36, gen throughput (token/s): 229.72, #queue-req: 0
  3068. 2025-05-17 23:00:28,249 - __main__ - INFO - sglang running req: 5 queue req: 0
  3069. 2025-05-17 23:00:28,284 - __main__ - INFO - Queue remaining: 0
  3070. 2025-05-17 23:00:28,284 - __main__ - INFO -
  3071. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3072. ----------------------------------------------------------------------------------
  3073. sglang_input_tokens 188.90 188.90
  3074. sglang_output_tokens 48.36 48.36
  3075. 2025-05-17 23:00:28,285 - __main__ - INFO -
  3076. Worker ID | finished | started
  3077. ----------+----------+--------
  3078. 0 | 10 | 15
  3079. 2025-05-17 23:00:29,130 - sglang - INFO - [2025-05-17 23:00:29 TP0] Decode batch. #running-req: 5, #token: 13773, token usage: 0.36, gen throughput (token/s): 226.73, #queue-req: 0
  3080. 2025-05-17 23:00:29,131 - __main__ - INFO - sglang running req: 5 queue req: 0
  3081. 2025-05-17 23:00:30,014 - sglang - INFO - [2025-05-17 23:00:30 TP0] Decode batch. #running-req: 5, #token: 13973, token usage: 0.37, gen throughput (token/s): 226.49, #queue-req: 0
  3082. 2025-05-17 23:00:30,014 - __main__ - INFO - sglang running req: 5 queue req: 0
  3083. 2025-05-17 23:00:30,894 - sglang - INFO - [2025-05-17 23:00:30 TP0] Decode batch. #running-req: 5, #token: 14173, token usage: 0.37, gen throughput (token/s): 227.18, #queue-req: 0
  3084. 2025-05-17 23:00:30,894 - __main__ - INFO - sglang running req: 5 queue req: 0
  3085. 2025-05-17 23:00:31,778 - sglang - INFO - [2025-05-17 23:00:31 TP0] Decode batch. #running-req: 5, #token: 14373, token usage: 0.38, gen throughput (token/s): 226.30, #queue-req: 0
  3086. 2025-05-17 23:00:31,778 - __main__ - INFO - sglang running req: 5 queue req: 0
  3087. 2025-05-17 23:00:32,659 - sglang - INFO - [2025-05-17 23:00:32 TP0] Decode batch. #running-req: 5, #token: 14573, token usage: 0.38, gen throughput (token/s): 226.91, #queue-req: 0
  3088. 2025-05-17 23:00:32,659 - __main__ - INFO - sglang running req: 5 queue req: 0
  3089. 2025-05-17 23:00:33,155 - __main__ - INFO - Got invalid_page rotation for olmocr_workspace/job_1747493917/input.pdf-15 attempt 0, retrying with 90 rotation
  3090. 2025-05-17 23:00:33,155 - __main__ - WARNING - ValueError on attempt 0 for olmocr_workspace/job_1747493917/input.pdf-15: <class 'ValueError'> - invalid_page rotation for olmocr_workspace/job_1747493917/input.pdf-15
  3091. 2025-05-17 23:00:33,430 - __main__ - INFO - Built page query for olmocr_workspace/job_1747493917/input.pdf-15
  3092. 2025-05-17 23:00:33,547 - sglang - INFO - [2025-05-17 23:00:33 TP0] Decode batch. #running-req: 3, #token: 9596, token usage: 0.25, gen throughput (token/s): 199.32, #queue-req: 0
  3093. 2025-05-17 23:00:33,547 - __main__ - INFO - sglang running req: 3 queue req: 0
  3094. 2025-05-17 23:00:33,648 - sglang - INFO - [2025-05-17 23:00:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1868, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.25, #running-req: 3, #queue-req: 0
  3095. 2025-05-17 23:00:33,648 - __main__ - INFO - sglang running req: 3 queue req: 0
  3096. 2025-05-17 23:00:35,157 - sglang - INFO - [2025-05-17 23:00:35 TP0] Decode batch. #running-req: 4, #token: 11620, token usage: 0.31, gen throughput (token/s): 96.88, #queue-req: 0
  3097. 2025-05-17 23:00:35,158 - __main__ - INFO - sglang running req: 4 queue req: 0
  3098. 2025-05-17 23:00:36,027 - sglang - INFO - [2025-05-17 23:00:36 TP0] Decode batch. #running-req: 4, #token: 11780, token usage: 0.31, gen throughput (token/s): 183.98, #queue-req: 0
  3099. 2025-05-17 23:00:36,027 - __main__ - INFO - sglang running req: 4 queue req: 0
  3100. 2025-05-17 23:00:36,903 - sglang - INFO - [2025-05-17 23:00:36 TP0] Decode batch. #running-req: 4, #token: 11940, token usage: 0.31, gen throughput (token/s): 182.74, #queue-req: 0
  3101. 2025-05-17 23:00:36,903 - __main__ - INFO - sglang running req: 4 queue req: 0
  3102. 2025-05-17 23:00:37,775 - sglang - INFO - [2025-05-17 23:00:37 TP0] Decode batch. #running-req: 4, #token: 12100, token usage: 0.32, gen throughput (token/s): 183.27, #queue-req: 0
  3103. 2025-05-17 23:00:37,776 - __main__ - INFO - sglang running req: 4 queue req: 0
  3104. 2025-05-17 23:00:38,285 - __main__ - INFO - Queue remaining: 0
  3105. 2025-05-17 23:00:38,285 - __main__ - INFO -
  3106. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3107. ----------------------------------------------------------------------------------
  3108. sglang_input_tokens 230.12 230.12
  3109. sglang_output_tokens 60.90 60.90
  3110. 2025-05-17 23:00:38,286 - __main__ - INFO -
  3111. Worker ID | finished | started
  3112. ----------+----------+--------
  3113. 0 | 12 | 15
  3114. 2025-05-17 23:00:38,641 - sglang - INFO - [2025-05-17 23:00:38 TP0] Decode batch. #running-req: 3, #token: 8863, token usage: 0.23, gen throughput (token/s): 141.03, #queue-req: 0
  3115. 2025-05-17 23:00:38,641 - __main__ - INFO - sglang running req: 3 queue req: 0
  3116. 2025-05-17 23:00:39,505 - sglang - INFO - [2025-05-17 23:00:39 TP0] Decode batch. #running-req: 3, #token: 8983, token usage: 0.24, gen throughput (token/s): 138.86, #queue-req: 0
  3117. 2025-05-17 23:00:39,505 - __main__ - INFO - sglang running req: 3 queue req: 0
  3118. 2025-05-17 23:00:40,367 - sglang - INFO - [2025-05-17 23:00:40 TP0] Decode batch. #running-req: 2, #token: 5623, token usage: 0.15, gen throughput (token/s): 138.00, #queue-req: 0
  3119. 2025-05-17 23:00:40,367 - __main__ - INFO - sglang running req: 2 queue req: 0
  3120. 2025-05-17 23:00:41,219 - sglang - INFO - [2025-05-17 23:00:41 TP0] Decode batch. #running-req: 1, #token: 2184, token usage: 0.06, gen throughput (token/s): 79.84, #queue-req: 0
  3121. 2025-05-17 23:00:41,219 - __main__ - INFO - sglang running req: 1 queue req: 0
  3122. 2025-05-17 23:00:42,057 - sglang - INFO - [2025-05-17 23:00:42 TP0] Decode batch. #running-req: 1, #token: 2224, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
  3123. 2025-05-17 23:00:42,057 - __main__ - INFO - sglang running req: 1 queue req: 0
  3124. 2025-05-17 23:00:42,892 - sglang - INFO - [2025-05-17 23:00:42 TP0] Decode batch. #running-req: 1, #token: 2264, token usage: 0.06, gen throughput (token/s): 47.90, #queue-req: 0
  3125. 2025-05-17 23:00:42,892 - __main__ - INFO - sglang running req: 1 queue req: 0
  3126. 2025-05-17 23:00:43,727 - sglang - INFO - [2025-05-17 23:00:43 TP0] Decode batch. #running-req: 1, #token: 2304, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
  3127. 2025-05-17 23:00:43,728 - __main__ - INFO - sglang running req: 1 queue req: 0
  3128. 2025-05-17 23:00:44,566 - sglang - INFO - [2025-05-17 23:00:44 TP0] Decode batch. #running-req: 1, #token: 2344, token usage: 0.06, gen throughput (token/s): 47.68, #queue-req: 0
  3129. 2025-05-17 23:00:44,566 - __main__ - INFO - sglang running req: 1 queue req: 0
  3130. 2025-05-17 23:00:45,409 - sglang - INFO - [2025-05-17 23:00:45 TP0] Decode batch. #running-req: 1, #token: 2384, token usage: 0.06, gen throughput (token/s): 47.48, #queue-req: 0
  3131. 2025-05-17 23:00:45,409 - __main__ - INFO - sglang running req: 1 queue req: 0
  3132. 2025-05-17 23:00:46,004 - __main__ - INFO - Finished TaskGroup for worker on 02907a3ba6226f0399bbf3080296d8a1a280e502
  3133. 2025-05-17 23:00:46,004 - __main__ - INFO - Got 1 docs for 02907a3ba6226f0399bbf3080296d8a1a280e502
  3134. 2025-05-17 23:00:46,006 - __main__ - INFO - Worker 0 exiting due to empty queue
  3135. 2025-05-17 23:00:46,006 - __main__ - INFO - Work done
  3136. 2025-05-17 23:00:46,007 - __main__ - INFO - Got cancellation request for SGLang server
  3137. 2025-05-17 23:06:23,302 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  3138. 2025-05-17 23:06:23,302 - __main__ - INFO - Loading file at tests/gnarly_pdfs/badlines.pdf as PDF document
  3139. 2025-05-17 23:06:23,302 - __main__ - INFO - Found 1 total pdf paths to add
  3140. 2025-05-17 23:06:23,309 - __main__ - INFO - Calculated items_per_group: 50 based on average pages per PDF: 10.00
  3141. 2025-05-17 23:06:23,548 - __main__ - INFO - Starting pipeline with PID 416546
  3142. 2025-05-17 23:06:23,549 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  3143. 2025-05-17 23:06:29,154 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  3144. 2025-05-17 23:06:30,200 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  3145. 2025-05-17 23:06:31,251 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  3146. 2025-05-17 23:06:32,316 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  3147. 2025-05-17 23:06:33,381 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  3148. 2025-05-17 23:06:34,451 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  3149. 2025-05-17 23:06:35,178 - sglang - INFO - [2025-05-17 23:06:35] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=153903282, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3150. 2025-05-17 23:06:35,178 - __main__ - INFO - [2025-05-17 23:06:35] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=153903282, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3151. 2025-05-17 23:06:35,496 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  3152. 2025-05-17 23:06:36,562 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  3153. 2025-05-17 23:06:37,628 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  3154. 2025-05-17 23:06:38,698 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  3155. 2025-05-17 23:06:39,768 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  3156. 2025-05-17 23:06:40,834 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  3157. 2025-05-17 23:06:41,900 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  3158. 2025-05-17 23:06:42,966 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  3159. 2025-05-17 23:06:43,533 - sglang - INFO - [2025-05-17 23:06:43] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3160. 2025-05-17 23:06:43,533 - __main__ - INFO - [2025-05-17 23:06:43] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3161. 2025-05-17 23:06:44,045 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  3162. 2025-05-17 23:06:45,117 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  3163. 2025-05-17 23:06:46,182 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  3164. 2025-05-17 23:06:47,236 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  3165. 2025-05-17 23:06:48,297 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  3166. 2025-05-17 23:06:49,361 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  3167. 2025-05-17 23:06:49,771 - sglang - INFO - [2025-05-17 23:06:49 TP0] Overlap scheduler is disabled for multimodal models.
  3168. 2025-05-17 23:06:49,771 - __main__ - INFO - [2025-05-17 23:06:49 TP0] Overlap scheduler is disabled for multimodal models.
  3169. 2025-05-17 23:06:50,440 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  3170. 2025-05-17 23:06:50,582 - sglang - INFO - [2025-05-17 23:06:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3171. 2025-05-17 23:06:50,582 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3172. 2025-05-17 23:06:50,583 - sglang - INFO - [2025-05-17 23:06:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3173. 2025-05-17 23:06:50,583 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3174. 2025-05-17 23:06:50,583 - sglang - INFO - [2025-05-17 23:06:50 TP0] Init torch distributed begin.
  3175. 2025-05-17 23:06:50,583 - __main__ - INFO - [2025-05-17 23:06:50 TP0] Init torch distributed begin.
  3176. 2025-05-17 23:06:51,520 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  3177. 2025-05-17 23:06:52,595 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  3178. 2025-05-17 23:06:53,661 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  3179. 2025-05-17 23:06:54,727 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  3180. 2025-05-17 23:06:55,780 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  3181. 2025-05-17 23:06:55,881 - sglang - INFO - [2025-05-17 23:06:55 TP0] Load weight begin. avail mem=23.33 GB
  3182. 2025-05-17 23:06:55,881 - __main__ - INFO - [2025-05-17 23:06:55 TP0] Load weight begin. avail mem=23.33 GB
  3183. 2025-05-17 23:06:56,854 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  3184. 2025-05-17 23:06:56,906 - sglang - INFO - [2025-05-17 23:06:56 TP0] Using model weights format ['*.safetensors']
  3185. 2025-05-17 23:06:56,906 - __main__ - INFO - [2025-05-17 23:06:56 TP0] Using model weights format ['*.safetensors']
  3186. 2025-05-17 23:06:57,540 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3187. 2025-05-17 23:06:57,540 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3188. 2025-05-17 23:06:57,816 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
  3189. 2025-05-17 23:06:57,816 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
  3190. 2025-05-17 23:06:57,933 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  3191. 2025-05-17 23:06:58,742 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3192. 2025-05-17 23:06:58,742 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3193. 2025-05-17 23:06:59,013 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  3194. 2025-05-17 23:06:59,678 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.27it/s]
  3195. 2025-05-17 23:06:59,678 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.27it/s]
  3196. 2025-05-17 23:07:00,093 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  3197. 2025-05-17 23:07:00,590 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  3198. 2025-05-17 23:07:00,590 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  3199. 2025-05-17 23:07:00,590 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  3200. 2025-05-17 23:07:00,590 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  3201. 2025-05-17 23:07:00,590 - sglang - INFO -
  3202. 2025-05-17 23:07:00,590 - __main__ - INFO -
  3203. 2025-05-17 23:07:00,736 - sglang - INFO - [2025-05-17 23:07:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3204. 2025-05-17 23:07:00,737 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3205. 2025-05-17 23:07:00,743 - sglang - INFO - [2025-05-17 23:07:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3206. 2025-05-17 23:07:00,743 - __main__ - INFO - [2025-05-17 23:07:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3207. 2025-05-17 23:07:00,743 - sglang - INFO - [2025-05-17 23:07:00 TP0] Memory pool end. avail mem=5.30 GB
  3208. 2025-05-17 23:07:00,743 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Memory pool end. avail mem=5.30 GB
  3209. 2025-05-17 23:07:00,920 - sglang - INFO - [2025-05-17 23:07:00 TP0] Capture cuda graph begin. This can take up to several minutes.
  3210. 2025-05-17 23:07:00,920 - __main__ - INFO - [2025-05-17 23:07:00 TP0] Capture cuda graph begin. This can take up to several minutes.
  3211. 2025-05-17 23:07:01,172 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  3212. 2025-05-17 23:07:02,252 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  3213. 2025-05-17 23:07:02,669 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.84it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.40it/s] 100%|██████████| 4/4 [00:01<00:00, 2.80it/s] 100%|██████████| 4/4 [00:01<00:00, 2.29it/s]
  3214. 2025-05-17 23:07:02,669 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.84it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.40it/s] 100%|██████████| 4/4 [00:01<00:00, 2.80it/s] 100%|██████████| 4/4 [00:01<00:00, 2.29it/s]
  3215. 2025-05-17 23:07:02,670 - sglang - INFO - [2025-05-17 23:07:02 TP0] Capture cuda graph end. Time elapsed: 1.75 s
  3216. 2025-05-17 23:07:02,670 - __main__ - INFO - [2025-05-17 23:07:02 TP0] Capture cuda graph end. Time elapsed: 1.75 s
  3217. 2025-05-17 23:07:03,288 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  3218. 2025-05-17 23:07:04,325 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  3219. 2025-05-17 23:07:05,386 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  3220. 2025-05-17 23:07:05,965 - sglang - INFO - [2025-05-17 23:07:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3221. 2025-05-17 23:07:05,965 - __main__ - INFO - [2025-05-17 23:07:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3222. 2025-05-17 23:07:06,481 - __main__ - INFO - sglang server is ready.
  3223. 2025-05-17 23:07:06,481 - __main__ - INFO - Queue remaining: 1
  3224. 2025-05-17 23:07:06,481 - __main__ - INFO -
  3225. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3226. ----------------------------------------------------------------------------------
  3227. 2025-05-17 23:07:06,481 - __main__ - INFO -
  3228. Worker ID
  3229. ---------
  3230. 2025-05-17 23:07:06,482 - __main__ - INFO - Worker 0 processing work item 9135f55c864185c3e61b48277b842dd16a718eb8
  3231. 2025-05-17 23:07:06,482 - __main__ - INFO - Created all tasks for 9135f55c864185c3e61b48277b842dd16a718eb8
  3232. 2025-05-17 23:07:06,494 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/badlines.pdf in worker 0
  3233. 2025-05-17 23:07:07,040 - sglang - INFO - [2025-05-17 23:07:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3234. 2025-05-17 23:07:07,040 - __main__ - INFO - [2025-05-17 23:07:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3235. 2025-05-17 23:07:07,040 - __main__ - INFO - sglang running req: 0 queue req: 0
  3236. 2025-05-17 23:07:08,155 - sglang - INFO - [2025-05-17 23:07:08] The server is fired up and ready to roll!
  3237. 2025-05-17 23:07:08,155 - __main__ - INFO - [2025-05-17 23:07:08] The server is fired up and ready to roll!
  3238. 2025-05-17 23:07:13,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-1
  3239. 2025-05-17 23:07:13,278 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-2
  3240. 2025-05-17 23:07:13,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-4
  3241. 2025-05-17 23:07:13,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-3
  3242. 2025-05-17 23:07:13,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-5
  3243. 2025-05-17 23:07:13,381 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-6
  3244. 2025-05-17 23:07:13,392 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-7
  3245. 2025-05-17 23:07:13,413 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-10
  3246. 2025-05-17 23:07:13,414 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-9
  3247. 2025-05-17 23:07:13,423 - __main__ - INFO - Built page query for tests/gnarly_pdfs/badlines.pdf-8
  3248. 2025-05-17 23:07:16,482 - __main__ - INFO - Queue remaining: 0
  3249. 2025-05-17 23:07:16,483 - __main__ - INFO -
  3250. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3251. ----------------------------------------------------------------------------------
  3252. 2025-05-17 23:07:16,483 - __main__ - INFO -
  3253. Worker ID | started
  3254. ----------+--------
  3255. 0 | 10
  3256. 2025-05-17 23:07:26,485 - __main__ - INFO - Queue remaining: 0
  3257. 2025-05-17 23:07:26,486 - __main__ - INFO -
  3258. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3259. ----------------------------------------------------------------------------------
  3260. 2025-05-17 23:07:26,486 - __main__ - INFO -
  3261. Worker ID | started
  3262. ----------+--------
  3263. 0 | 10
  3264. 2025-05-17 23:07:35,638 - sglang - INFO - [2025-05-17 23:07:35 TP0] Prefill batch. #new-seq: 1, #new-token: 3115, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3265. 2025-05-17 23:07:35,638 - __main__ - INFO - sglang running req: 0 queue req: 0
  3266. 2025-05-17 23:07:36,487 - __main__ - INFO - Queue remaining: 0
  3267. 2025-05-17 23:07:36,487 - __main__ - INFO -
  3268. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3269. ----------------------------------------------------------------------------------
  3270. 2025-05-17 23:07:36,488 - __main__ - INFO -
  3271. Worker ID | started
  3272. ----------+--------
  3273. 0 | 10
  3274. 2025-05-17 23:07:36,779 - sglang - INFO - [2025-05-17 23:07:36 TP0] Prefill batch. #new-seq: 3, #new-token: 12922, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.08, #running-req: 1, #queue-req: 6
  3275. 2025-05-17 23:07:36,779 - __main__ - INFO - sglang running req: 1 queue req: 6
  3276. 2025-05-17 23:07:40,138 - sglang - INFO - [2025-05-17 23:07:40 TP0] Prefill batch. #new-seq: 1, #new-token: 3999, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.42, #running-req: 4, #queue-req: 5
  3277. 2025-05-17 23:07:40,138 - __main__ - INFO - sglang running req: 4 queue req: 5
  3278. 2025-05-17 23:07:42,131 - sglang - INFO - [2025-05-17 23:07:42 TP0] Decode batch. #running-req: 5, #token: 20201, token usage: 0.53, gen throughput (token/s): 4.76, #queue-req: 5
  3279. 2025-05-17 23:07:42,131 - __main__ - INFO - sglang running req: 5 queue req: 5
  3280. 2025-05-17 23:07:43,021 - sglang - INFO - [2025-05-17 23:07:43 TP0] Decode batch. #running-req: 5, #token: 20401, token usage: 0.54, gen throughput (token/s): 224.78, #queue-req: 5
  3281. 2025-05-17 23:07:43,021 - __main__ - INFO - sglang running req: 5 queue req: 5
  3282. 2025-05-17 23:07:43,905 - sglang - INFO - [2025-05-17 23:07:43 TP0] Decode batch. #running-req: 5, #token: 20601, token usage: 0.54, gen throughput (token/s): 226.09, #queue-req: 5
  3283. 2025-05-17 23:07:43,906 - __main__ - INFO - sglang running req: 5 queue req: 5
  3284. 2025-05-17 23:07:44,792 - sglang - INFO - [2025-05-17 23:07:44 TP0] Decode batch. #running-req: 5, #token: 20801, token usage: 0.55, gen throughput (token/s): 225.65, #queue-req: 5
  3285. 2025-05-17 23:07:44,792 - __main__ - INFO - sglang running req: 5 queue req: 5
  3286. 2025-05-17 23:07:45,678 - sglang - INFO - [2025-05-17 23:07:45 TP0] Decode batch. #running-req: 5, #token: 21001, token usage: 0.55, gen throughput (token/s): 225.76, #queue-req: 5
  3287. 2025-05-17 23:07:45,678 - __main__ - INFO - sglang running req: 5 queue req: 5
  3288. 2025-05-17 23:07:46,488 - __main__ - INFO - Queue remaining: 0
  3289. 2025-05-17 23:07:46,488 - __main__ - INFO -
  3290. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3291. ----------------------------------------------------------------------------------
  3292. 2025-05-17 23:07:46,488 - __main__ - INFO -
  3293. Worker ID | started
  3294. ----------+--------
  3295. 0 | 10
  3296. 2025-05-17 23:07:46,564 - sglang - INFO - [2025-05-17 23:07:46 TP0] Decode batch. #running-req: 5, #token: 21201, token usage: 0.56, gen throughput (token/s): 225.46, #queue-req: 5
  3297. 2025-05-17 23:07:46,565 - __main__ - INFO - sglang running req: 5 queue req: 5
  3298. 2025-05-17 23:07:47,452 - sglang - INFO - [2025-05-17 23:07:47 TP0] Decode batch. #running-req: 5, #token: 21401, token usage: 0.56, gen throughput (token/s): 225.28, #queue-req: 5
  3299. 2025-05-17 23:07:47,452 - __main__ - INFO - sglang running req: 5 queue req: 5
  3300. 2025-05-17 23:07:48,343 - sglang - INFO - [2025-05-17 23:07:48 TP0] Decode batch. #running-req: 5, #token: 21601, token usage: 0.57, gen throughput (token/s): 224.53, #queue-req: 5
  3301. 2025-05-17 23:07:48,343 - __main__ - INFO - sglang running req: 5 queue req: 5
  3302. 2025-05-17 23:07:49,240 - sglang - INFO - [2025-05-17 23:07:49 TP0] Decode batch. #running-req: 5, #token: 21801, token usage: 0.57, gen throughput (token/s): 222.99, #queue-req: 5
  3303. 2025-05-17 23:07:49,240 - __main__ - INFO - sglang running req: 5 queue req: 5
  3304. 2025-05-17 23:07:50,130 - sglang - INFO - [2025-05-17 23:07:50 TP0] Decode batch. #running-req: 5, #token: 22001, token usage: 0.58, gen throughput (token/s): 224.64, #queue-req: 5
  3305. 2025-05-17 23:07:50,130 - __main__ - INFO - sglang running req: 5 queue req: 5
  3306. 2025-05-17 23:07:51,021 - sglang - INFO - [2025-05-17 23:07:51 TP0] Decode batch. #running-req: 5, #token: 22201, token usage: 0.58, gen throughput (token/s): 224.44, #queue-req: 5
  3307. 2025-05-17 23:07:51,022 - __main__ - INFO - sglang running req: 5 queue req: 5
  3308. 2025-05-17 23:07:51,912 - sglang - INFO - [2025-05-17 23:07:51 TP0] Decode batch. #running-req: 5, #token: 22401, token usage: 0.59, gen throughput (token/s): 224.45, #queue-req: 5
  3309. 2025-05-17 23:07:51,913 - __main__ - INFO - sglang running req: 5 queue req: 5
  3310. 2025-05-17 23:07:52,813 - sglang - INFO - [2025-05-17 23:07:52 TP0] Decode batch. #running-req: 5, #token: 22601, token usage: 0.59, gen throughput (token/s): 222.20, #queue-req: 5
  3311. 2025-05-17 23:07:52,813 - __main__ - INFO - sglang running req: 5 queue req: 5
  3312. 2025-05-17 23:07:53,709 - sglang - INFO - [2025-05-17 23:07:53 TP0] Decode batch. #running-req: 5, #token: 22801, token usage: 0.60, gen throughput (token/s): 223.16, #queue-req: 5
  3313. 2025-05-17 23:07:53,709 - __main__ - INFO - sglang running req: 5 queue req: 5
  3314. 2025-05-17 23:07:54,605 - sglang - INFO - [2025-05-17 23:07:54 TP0] Decode batch. #running-req: 5, #token: 23001, token usage: 0.61, gen throughput (token/s): 223.07, #queue-req: 5
  3315. 2025-05-17 23:07:54,606 - __main__ - INFO - sglang running req: 5 queue req: 5
  3316. 2025-05-17 23:07:55,500 - sglang - INFO - [2025-05-17 23:07:55 TP0] Decode batch. #running-req: 5, #token: 23201, token usage: 0.61, gen throughput (token/s): 223.62, #queue-req: 5
  3317. 2025-05-17 23:07:55,500 - __main__ - INFO - sglang running req: 5 queue req: 5
  3318. 2025-05-17 23:07:56,397 - sglang - INFO - [2025-05-17 23:07:56 TP0] Decode batch. #running-req: 5, #token: 23401, token usage: 0.62, gen throughput (token/s): 222.92, #queue-req: 5
  3319. 2025-05-17 23:07:56,397 - __main__ - INFO - sglang running req: 5 queue req: 5
  3320. 2025-05-17 23:07:56,490 - __main__ - INFO - Queue remaining: 0
  3321. 2025-05-17 23:07:56,490 - __main__ - INFO -
  3322. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3323. ----------------------------------------------------------------------------------
  3324. 2025-05-17 23:07:56,491 - __main__ - INFO -
  3325. Worker ID | started
  3326. ----------+--------
  3327. 0 | 10
  3328. 2025-05-17 23:07:57,300 - sglang - INFO - [2025-05-17 23:07:57 TP0] Decode batch. #running-req: 5, #token: 23601, token usage: 0.62, gen throughput (token/s): 221.44, #queue-req: 5
  3329. 2025-05-17 23:07:57,301 - __main__ - INFO - sglang running req: 5 queue req: 5
  3330. 2025-05-17 23:07:58,201 - sglang - INFO - [2025-05-17 23:07:58 TP0] Decode batch. #running-req: 5, #token: 23801, token usage: 0.63, gen throughput (token/s): 221.99, #queue-req: 5
  3331. 2025-05-17 23:07:58,201 - __main__ - INFO - sglang running req: 5 queue req: 5
  3332. 2025-05-17 23:07:59,102 - sglang - INFO - [2025-05-17 23:07:59 TP0] Decode batch. #running-req: 5, #token: 24001, token usage: 0.63, gen throughput (token/s): 222.08, #queue-req: 5
  3333. 2025-05-17 23:07:59,102 - __main__ - INFO - sglang running req: 5 queue req: 5
  3334. 2025-05-17 23:07:59,666 - sglang - INFO - [2025-05-17 23:07:59 TP0] Prefill batch. #new-seq: 2, #new-token: 7388, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 4, #queue-req: 3
  3335. 2025-05-17 23:07:59,666 - __main__ - INFO - sglang running req: 4 queue req: 3
  3336. 2025-05-17 23:08:02,168 - sglang - INFO - [2025-05-17 23:08:02 TP0] Decode batch. #running-req: 6, #token: 27671, token usage: 0.73, gen throughput (token/s): 69.79, #queue-req: 3
  3337. 2025-05-17 23:08:02,169 - __main__ - INFO - sglang running req: 6 queue req: 3
  3338. 2025-05-17 23:08:03,081 - sglang - INFO - [2025-05-17 23:08:03 TP0] Decode batch. #running-req: 6, #token: 27911, token usage: 0.73, gen throughput (token/s): 262.85, #queue-req: 3
  3339. 2025-05-17 23:08:03,082 - __main__ - INFO - sglang running req: 6 queue req: 3
  3340. 2025-05-17 23:08:03,997 - sglang - INFO - [2025-05-17 23:08:03 TP0] Decode batch. #running-req: 6, #token: 28151, token usage: 0.74, gen throughput (token/s): 262.18, #queue-req: 3
  3341. 2025-05-17 23:08:03,997 - __main__ - INFO - sglang running req: 6 queue req: 3
  3342. 2025-05-17 23:08:04,917 - sglang - INFO - [2025-05-17 23:08:04 TP0] Decode batch. #running-req: 6, #token: 28391, token usage: 0.75, gen throughput (token/s): 260.73, #queue-req: 3
  3343. 2025-05-17 23:08:04,917 - __main__ - INFO - sglang running req: 6 queue req: 3
  3344. 2025-05-17 23:08:05,835 - sglang - INFO - [2025-05-17 23:08:05 TP0] Decode batch. #running-req: 6, #token: 28631, token usage: 0.75, gen throughput (token/s): 261.54, #queue-req: 3
  3345. 2025-05-17 23:08:05,835 - __main__ - INFO - sglang running req: 6 queue req: 3
  3346. 2025-05-17 23:08:06,491 - __main__ - INFO - Queue remaining: 0
  3347. 2025-05-17 23:08:06,492 - __main__ - INFO -
  3348. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3349. ----------------------------------------------------------------------------------
  3350. sglang_input_tokens 30.18 30.18
  3351. sglang_output_tokens 7.93 7.93
  3352. 2025-05-17 23:08:06,492 - __main__ - INFO -
  3353. Worker ID | finished | started
  3354. ----------+----------+--------
  3355. 0 | 1 | 10
  3356. 2025-05-17 23:08:06,753 - sglang - INFO - [2025-05-17 23:08:06 TP0] Decode batch. #running-req: 6, #token: 28871, token usage: 0.76, gen throughput (token/s): 261.22, #queue-req: 3
  3357. 2025-05-17 23:08:06,754 - __main__ - INFO - sglang running req: 6 queue req: 3
  3358. 2025-05-17 23:08:07,672 - sglang - INFO - [2025-05-17 23:08:07 TP0] Decode batch. #running-req: 6, #token: 29111, token usage: 0.77, gen throughput (token/s): 261.24, #queue-req: 3
  3359. 2025-05-17 23:08:07,672 - __main__ - INFO - sglang running req: 6 queue req: 3
  3360. 2025-05-17 23:08:08,600 - sglang - INFO - [2025-05-17 23:08:08 TP0] Decode batch. #running-req: 6, #token: 29351, token usage: 0.77, gen throughput (token/s): 258.69, #queue-req: 3
  3361. 2025-05-17 23:08:08,600 - __main__ - INFO - sglang running req: 6 queue req: 3
  3362. 2025-05-17 23:08:09,522 - sglang - INFO - [2025-05-17 23:08:09 TP0] Decode batch. #running-req: 6, #token: 29591, token usage: 0.78, gen throughput (token/s): 260.24, #queue-req: 3
  3363. 2025-05-17 23:08:09,523 - __main__ - INFO - sglang running req: 6 queue req: 3
  3364. 2025-05-17 23:08:10,078 - sglang - INFO - [2025-05-17 23:08:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 5, #queue-req: 2
  3365. 2025-05-17 23:08:10,079 - __main__ - INFO - sglang running req: 5 queue req: 2
  3366. 2025-05-17 23:08:11,267 - sglang - INFO - [2025-05-17 23:08:11 TP0] Prefill batch. #new-seq: 2, #new-token: 8214, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.58, #running-req: 5, #queue-req: 0
  3367. 2025-05-17 23:08:11,267 - __main__ - INFO - sglang running req: 5 queue req: 0
  3368. 2025-05-17 23:08:11,437 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  3369. 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 1 exiting due to empty queue
  3370. 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 2 exiting due to empty queue
  3371. 2025-05-17 23:08:11,437 - __main__ - INFO - Worker 3 exiting due to empty queue
  3372. 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 4 exiting due to empty queue
  3373. 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 5 exiting due to empty queue
  3374. 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 6 exiting due to empty queue
  3375. 2025-05-17 23:08:11,438 - __main__ - INFO - Worker 7 exiting due to empty queue
  3376. 2025-05-17 23:08:13,811 - sglang - INFO - [2025-05-17 23:08:13 TP0] Decode batch. #running-req: 7, #token: 30217, token usage: 0.80, gen throughput (token/s): 56.66, #queue-req: 0
  3377. 2025-05-17 23:08:13,811 - __main__ - INFO - sglang running req: 7 queue req: 0
  3378. 2025-05-17 23:08:14,740 - sglang - INFO - [2025-05-17 23:08:14 TP0] Decode batch. #running-req: 7, #token: 30497, token usage: 0.80, gen throughput (token/s): 301.38, #queue-req: 0
  3379. 2025-05-17 23:08:14,740 - __main__ - INFO - sglang running req: 7 queue req: 0
  3380. 2025-05-17 23:08:15,674 - sglang - INFO - [2025-05-17 23:08:15 TP0] Decode batch. #running-req: 7, #token: 30777, token usage: 0.81, gen throughput (token/s): 299.88, #queue-req: 0
  3381. 2025-05-17 23:08:15,674 - __main__ - INFO - sglang running req: 7 queue req: 0
  3382. 2025-05-17 23:08:16,492 - __main__ - INFO - Queue remaining: 0
  3383. 2025-05-17 23:08:16,493 - __main__ - INFO -
  3384. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3385. ----------------------------------------------------------------------------------
  3386. sglang_input_tokens 101.61 101.61
  3387. sglang_output_tokens 28.12 28.12
  3388. 2025-05-17 23:08:16,493 - __main__ - INFO -
  3389. Worker ID | finished | started
  3390. ----------+----------+--------
  3391. 0 | 3 | 10
  3392. 2025-05-17 23:08:16,605 - sglang - INFO - [2025-05-17 23:08:16 TP0] Decode batch. #running-req: 7, #token: 31057, token usage: 0.82, gen throughput (token/s): 300.57, #queue-req: 0
  3393. 2025-05-17 23:08:16,605 - __main__ - INFO - sglang running req: 7 queue req: 0
  3394. 2025-05-17 23:08:17,535 - sglang - INFO - [2025-05-17 23:08:17 TP0] Decode batch. #running-req: 7, #token: 31337, token usage: 0.82, gen throughput (token/s): 301.25, #queue-req: 0
  3395. 2025-05-17 23:08:17,535 - __main__ - INFO - sglang running req: 7 queue req: 0
  3396. 2025-05-17 23:08:18,459 - sglang - INFO - [2025-05-17 23:08:18 TP0] Decode batch. #running-req: 6, #token: 26755, token usage: 0.70, gen throughput (token/s): 262.99, #queue-req: 0
  3397. 2025-05-17 23:08:18,459 - __main__ - INFO - sglang running req: 6 queue req: 0
  3398. 2025-05-17 23:08:19,386 - sglang - INFO - [2025-05-17 23:08:19 TP0] Decode batch. #running-req: 6, #token: 26995, token usage: 0.71, gen throughput (token/s): 258.64, #queue-req: 0
  3399. 2025-05-17 23:08:19,387 - __main__ - INFO - sglang running req: 6 queue req: 0
  3400. 2025-05-17 23:08:20,307 - sglang - INFO - [2025-05-17 23:08:20 TP0] Decode batch. #running-req: 5, #token: 20697, token usage: 0.54, gen throughput (token/s): 250.92, #queue-req: 0
  3401. 2025-05-17 23:08:20,307 - __main__ - INFO - sglang running req: 5 queue req: 0
  3402. 2025-05-17 23:08:21,214 - sglang - INFO - [2025-05-17 23:08:21 TP0] Decode batch. #running-req: 5, #token: 20897, token usage: 0.55, gen throughput (token/s): 220.58, #queue-req: 0
  3403. 2025-05-17 23:08:21,214 - __main__ - INFO - sglang running req: 5 queue req: 0
  3404. 2025-05-17 23:08:22,119 - sglang - INFO - [2025-05-17 23:08:22 TP0] Decode batch. #running-req: 5, #token: 21097, token usage: 0.56, gen throughput (token/s): 220.91, #queue-req: 0
  3405. 2025-05-17 23:08:22,120 - __main__ - INFO - sglang running req: 5 queue req: 0
  3406. 2025-05-17 23:08:23,038 - sglang - INFO - [2025-05-17 23:08:23 TP0] Decode batch. #running-req: 5, #token: 21297, token usage: 0.56, gen throughput (token/s): 217.73, #queue-req: 0
  3407. 2025-05-17 23:08:23,038 - __main__ - INFO - sglang running req: 5 queue req: 0
  3408. 2025-05-17 23:08:23,954 - sglang - INFO - [2025-05-17 23:08:23 TP0] Decode batch. #running-req: 5, #token: 21497, token usage: 0.57, gen throughput (token/s): 218.32, #queue-req: 0
  3409. 2025-05-17 23:08:23,954 - __main__ - INFO - sglang running req: 5 queue req: 0
  3410. 2025-05-17 23:08:24,869 - sglang - INFO - [2025-05-17 23:08:24 TP0] Decode batch. #running-req: 5, #token: 21697, token usage: 0.57, gen throughput (token/s): 218.43, #queue-req: 0
  3411. 2025-05-17 23:08:24,870 - __main__ - INFO - sglang running req: 5 queue req: 0
  3412. 2025-05-17 23:08:25,785 - sglang - INFO - [2025-05-17 23:08:25 TP0] Decode batch. #running-req: 5, #token: 21897, token usage: 0.58, gen throughput (token/s): 218.38, #queue-req: 0
  3413. 2025-05-17 23:08:25,786 - __main__ - INFO - sglang running req: 5 queue req: 0
  3414. 2025-05-17 23:08:26,494 - __main__ - INFO - Queue remaining: 0
  3415. 2025-05-17 23:08:26,494 - __main__ - INFO -
  3416. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3417. ----------------------------------------------------------------------------------
  3418. sglang_input_tokens 162.63 162.63
  3419. sglang_output_tokens 48.74 48.74
  3420. 2025-05-17 23:08:26,494 - __main__ - INFO -
  3421. Worker ID | finished | started
  3422. ----------+----------+--------
  3423. 0 | 5 | 10
  3424. 2025-05-17 23:08:26,705 - sglang - INFO - [2025-05-17 23:08:26 TP0] Decode batch. #running-req: 5, #token: 22097, token usage: 0.58, gen throughput (token/s): 217.36, #queue-req: 0
  3425. 2025-05-17 23:08:26,706 - __main__ - INFO - sglang running req: 5 queue req: 0
  3426. 2025-05-17 23:08:27,633 - sglang - INFO - [2025-05-17 23:08:27 TP0] Decode batch. #running-req: 5, #token: 22297, token usage: 0.59, gen throughput (token/s): 215.52, #queue-req: 0
  3427. 2025-05-17 23:08:27,634 - __main__ - INFO - sglang running req: 5 queue req: 0
  3428. 2025-05-17 23:08:28,553 - sglang - INFO - [2025-05-17 23:08:28 TP0] Decode batch. #running-req: 5, #token: 22497, token usage: 0.59, gen throughput (token/s): 217.45, #queue-req: 0
  3429. 2025-05-17 23:08:28,554 - __main__ - INFO - sglang running req: 5 queue req: 0
  3430. 2025-05-17 23:08:29,469 - sglang - INFO - [2025-05-17 23:08:29 TP0] Decode batch. #running-req: 5, #token: 22697, token usage: 0.60, gen throughput (token/s): 218.41, #queue-req: 0
  3431. 2025-05-17 23:08:29,469 - __main__ - INFO - sglang running req: 5 queue req: 0
  3432. 2025-05-17 23:08:30,389 - sglang - INFO - [2025-05-17 23:08:30 TP0] Decode batch. #running-req: 5, #token: 22897, token usage: 0.60, gen throughput (token/s): 217.36, #queue-req: 0
  3433. 2025-05-17 23:08:30,389 - __main__ - INFO - sglang running req: 5 queue req: 0
  3434. 2025-05-17 23:08:31,296 - sglang - INFO - [2025-05-17 23:08:31 TP0] Decode batch. #running-req: 4, #token: 18516, token usage: 0.49, gen throughput (token/s): 189.67, #queue-req: 0
  3435. 2025-05-17 23:08:31,296 - __main__ - INFO - sglang running req: 4 queue req: 0
  3436. 2025-05-17 23:08:32,196 - sglang - INFO - [2025-05-17 23:08:32 TP0] Decode batch. #running-req: 4, #token: 18676, token usage: 0.49, gen throughput (token/s): 177.81, #queue-req: 0
  3437. 2025-05-17 23:08:32,196 - __main__ - INFO - sglang running req: 4 queue req: 0
  3438. 2025-05-17 23:08:33,094 - sglang - INFO - [2025-05-17 23:08:33 TP0] Decode batch. #running-req: 4, #token: 18836, token usage: 0.50, gen throughput (token/s): 178.20, #queue-req: 0
  3439. 2025-05-17 23:08:33,094 - __main__ - INFO - sglang running req: 4 queue req: 0
  3440. 2025-05-17 23:08:33,995 - sglang - INFO - [2025-05-17 23:08:33 TP0] Decode batch. #running-req: 4, #token: 18996, token usage: 0.50, gen throughput (token/s): 177.43, #queue-req: 0
  3441. 2025-05-17 23:08:33,995 - __main__ - INFO - sglang running req: 4 queue req: 0
  3442. 2025-05-17 23:08:34,904 - sglang - INFO - [2025-05-17 23:08:34 TP0] Decode batch. #running-req: 4, #token: 19156, token usage: 0.50, gen throughput (token/s): 176.02, #queue-req: 0
  3443. 2025-05-17 23:08:34,905 - __main__ - INFO - sglang running req: 4 queue req: 0
  3444. 2025-05-17 23:08:35,808 - sglang - INFO - [2025-05-17 23:08:35 TP0] Decode batch. #running-req: 4, #token: 19316, token usage: 0.51, gen throughput (token/s): 176.97, #queue-req: 0
  3445. 2025-05-17 23:08:35,809 - __main__ - INFO - sglang running req: 4 queue req: 0
  3446. 2025-05-17 23:08:36,495 - __main__ - INFO - Queue remaining: 0
  3447. 2025-05-17 23:08:36,495 - __main__ - INFO -
  3448. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3449. ----------------------------------------------------------------------------------
  3450. sglang_input_tokens 206.00 206.00
  3451. sglang_output_tokens 60.74 60.74
  3452. 2025-05-17 23:08:36,495 - __main__ - INFO -
  3453. Worker ID | finished | started
  3454. ----------+----------+--------
  3455. 0 | 7 | 10
  3456. 2025-05-17 23:08:36,702 - sglang - INFO - [2025-05-17 23:08:36 TP0] Decode batch. #running-req: 3, #token: 14513, token usage: 0.38, gen throughput (token/s): 147.71, #queue-req: 0
  3457. 2025-05-17 23:08:36,702 - __main__ - INFO - sglang running req: 3 queue req: 0
  3458. 2025-05-17 23:08:37,581 - sglang - INFO - [2025-05-17 23:08:37 TP0] Decode batch. #running-req: 2, #token: 10658, token usage: 0.28, gen throughput (token/s): 102.41, #queue-req: 0
  3459. 2025-05-17 23:08:37,581 - __main__ - INFO - sglang running req: 2 queue req: 0
  3460. 2025-05-17 23:08:38,453 - sglang - INFO - [2025-05-17 23:08:38 TP0] Decode batch. #running-req: 1, #token: 5341, token usage: 0.14, gen throughput (token/s): 79.15, #queue-req: 0
  3461. 2025-05-17 23:08:38,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  3462. 2025-05-17 23:08:39,310 - sglang - INFO - [2025-05-17 23:08:39 TP0] Decode batch. #running-req: 1, #token: 5381, token usage: 0.14, gen throughput (token/s): 46.64, #queue-req: 0
  3463. 2025-05-17 23:08:39,310 - __main__ - INFO - sglang running req: 1 queue req: 0
  3464. 2025-05-17 23:08:40,163 - sglang - INFO - [2025-05-17 23:08:40 TP0] Decode batch. #running-req: 1, #token: 5421, token usage: 0.14, gen throughput (token/s): 46.89, #queue-req: 0
  3465. 2025-05-17 23:08:40,164 - __main__ - INFO - sglang running req: 1 queue req: 0
  3466. 2025-05-17 23:08:41,016 - sglang - INFO - [2025-05-17 23:08:41 TP0] Decode batch. #running-req: 1, #token: 5461, token usage: 0.14, gen throughput (token/s): 46.90, #queue-req: 0
  3467. 2025-05-17 23:08:41,017 - __main__ - INFO - sglang running req: 1 queue req: 0
  3468. 2025-05-17 23:08:41,871 - sglang - INFO - [2025-05-17 23:08:41 TP0] Decode batch. #running-req: 1, #token: 5501, token usage: 0.14, gen throughput (token/s): 46.77, #queue-req: 0
  3469. 2025-05-17 23:08:41,872 - __main__ - INFO - sglang running req: 1 queue req: 0
  3470. 2025-05-17 23:08:42,728 - sglang - INFO - [2025-05-17 23:08:42 TP0] Decode batch. #running-req: 1, #token: 5541, token usage: 0.15, gen throughput (token/s): 46.68, #queue-req: 0
  3471. 2025-05-17 23:08:42,728 - __main__ - INFO - sglang running req: 1 queue req: 0
  3472. 2025-05-17 23:08:43,584 - sglang - INFO - [2025-05-17 23:08:43 TP0] Decode batch. #running-req: 1, #token: 5581, token usage: 0.15, gen throughput (token/s): 46.74, #queue-req: 0
  3473. 2025-05-17 23:08:43,584 - __main__ - INFO - sglang running req: 1 queue req: 0
  3474. 2025-05-17 23:08:44,439 - sglang - INFO - [2025-05-17 23:08:44 TP0] Decode batch. #running-req: 1, #token: 5621, token usage: 0.15, gen throughput (token/s): 46.81, #queue-req: 0
  3475. 2025-05-17 23:08:44,439 - __main__ - INFO - sglang running req: 1 queue req: 0
  3476. 2025-05-17 23:08:45,301 - sglang - INFO - [2025-05-17 23:08:45 TP0] Decode batch. #running-req: 1, #token: 5661, token usage: 0.15, gen throughput (token/s): 46.36, #queue-req: 0
  3477. 2025-05-17 23:08:45,302 - __main__ - INFO - sglang running req: 1 queue req: 0
  3478. 2025-05-17 23:08:46,161 - sglang - INFO - [2025-05-17 23:08:46 TP0] Decode batch. #running-req: 1, #token: 5701, token usage: 0.15, gen throughput (token/s): 46.56, #queue-req: 0
  3479. 2025-05-17 23:08:46,161 - __main__ - INFO - sglang running req: 1 queue req: 0
  3480. 2025-05-17 23:08:46,497 - __main__ - INFO - Queue remaining: 0
  3481. 2025-05-17 23:08:46,497 - __main__ - INFO -
  3482. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3483. ----------------------------------------------------------------------------------
  3484. sglang_input_tokens 239.52 239.52
  3485. sglang_output_tokens 73.76 73.76
  3486. 2025-05-17 23:08:46,497 - __main__ - INFO -
  3487. Worker ID | finished | started
  3488. ----------+----------+--------
  3489. 0 | 9 | 10
  3490. 2025-05-17 23:08:47,017 - sglang - INFO - [2025-05-17 23:08:47 TP0] Decode batch. #running-req: 1, #token: 5741, token usage: 0.15, gen throughput (token/s): 46.69, #queue-req: 0
  3491. 2025-05-17 23:08:47,017 - __main__ - INFO - sglang running req: 1 queue req: 0
  3492. 2025-05-17 23:08:47,874 - sglang - INFO - [2025-05-17 23:08:47 TP0] Decode batch. #running-req: 1, #token: 5781, token usage: 0.15, gen throughput (token/s): 46.65, #queue-req: 0
  3493. 2025-05-17 23:08:47,875 - __main__ - INFO - sglang running req: 1 queue req: 0
  3494. 2025-05-17 23:08:48,734 - sglang - INFO - [2025-05-17 23:08:48 TP0] Decode batch. #running-req: 1, #token: 5821, token usage: 0.15, gen throughput (token/s): 46.56, #queue-req: 0
  3495. 2025-05-17 23:08:48,734 - __main__ - INFO - sglang running req: 1 queue req: 0
  3496. 2025-05-17 23:08:49,598 - sglang - INFO - [2025-05-17 23:08:49 TP0] Decode batch. #running-req: 1, #token: 5861, token usage: 0.15, gen throughput (token/s): 46.29, #queue-req: 0
  3497. 2025-05-17 23:08:49,598 - __main__ - INFO - sglang running req: 1 queue req: 0
  3498. 2025-05-17 23:08:50,456 - sglang - INFO - [2025-05-17 23:08:50 TP0] Decode batch. #running-req: 1, #token: 5901, token usage: 0.16, gen throughput (token/s): 46.63, #queue-req: 0
  3499. 2025-05-17 23:08:50,456 - __main__ - INFO - sglang running req: 1 queue req: 0
  3500. 2025-05-17 23:08:51,313 - sglang - INFO - [2025-05-17 23:08:51 TP0] Decode batch. #running-req: 1, #token: 5941, token usage: 0.16, gen throughput (token/s): 46.65, #queue-req: 0
  3501. 2025-05-17 23:08:51,313 - __main__ - INFO - sglang running req: 1 queue req: 0
  3502. 2025-05-17 23:08:52,176 - sglang - INFO - [2025-05-17 23:08:52 TP0] Decode batch. #running-req: 1, #token: 5981, token usage: 0.16, gen throughput (token/s): 46.38, #queue-req: 0
  3503. 2025-05-17 23:08:52,176 - __main__ - INFO - sglang running req: 1 queue req: 0
  3504. 2025-05-17 23:08:53,038 - sglang - INFO - [2025-05-17 23:08:53 TP0] Decode batch. #running-req: 1, #token: 6021, token usage: 0.16, gen throughput (token/s): 46.39, #queue-req: 0
  3505. 2025-05-17 23:08:53,038 - __main__ - INFO - sglang running req: 1 queue req: 0
  3506. 2025-05-17 23:08:53,902 - sglang - INFO - [2025-05-17 23:08:53 TP0] Decode batch. #running-req: 1, #token: 6061, token usage: 0.16, gen throughput (token/s): 46.29, #queue-req: 0
  3507. 2025-05-17 23:08:53,902 - __main__ - INFO - sglang running req: 1 queue req: 0
  3508. 2025-05-17 23:08:54,763 - sglang - INFO - [2025-05-17 23:08:54 TP0] Decode batch. #running-req: 1, #token: 6101, token usage: 0.16, gen throughput (token/s): 46.45, #queue-req: 0
  3509. 2025-05-17 23:08:54,763 - __main__ - INFO - sglang running req: 1 queue req: 0
  3510. 2025-05-17 23:08:55,624 - sglang - INFO - [2025-05-17 23:08:55 TP0] Decode batch. #running-req: 1, #token: 6141, token usage: 0.16, gen throughput (token/s): 46.46, #queue-req: 0
  3511. 2025-05-17 23:08:55,624 - __main__ - INFO - sglang running req: 1 queue req: 0
  3512. 2025-05-17 23:08:56,487 - sglang - INFO - [2025-05-17 23:08:56 TP0] Decode batch. #running-req: 1, #token: 6181, token usage: 0.16, gen throughput (token/s): 46.36, #queue-req: 0
  3513. 2025-05-17 23:08:56,487 - __main__ - INFO - sglang running req: 1 queue req: 0
  3514. 2025-05-17 23:08:56,498 - __main__ - INFO - Queue remaining: 0
  3515. 2025-05-17 23:08:56,498 - __main__ - INFO -
  3516. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3517. ----------------------------------------------------------------------------------
  3518. sglang_input_tokens 223.89 223.89
  3519. sglang_output_tokens 68.95 68.95
  3520. 2025-05-17 23:08:56,498 - __main__ - INFO -
  3521. Worker ID | finished | started
  3522. ----------+----------+--------
  3523. 0 | 9 | 10
  3524. 2025-05-17 23:08:57,350 - sglang - INFO - [2025-05-17 23:08:57 TP0] Decode batch. #running-req: 1, #token: 6221, token usage: 0.16, gen throughput (token/s): 46.34, #queue-req: 0
  3525. 2025-05-17 23:08:57,350 - __main__ - INFO - sglang running req: 1 queue req: 0
  3526. 2025-05-17 23:08:58,212 - sglang - INFO - [2025-05-17 23:08:58 TP0] Decode batch. #running-req: 1, #token: 6261, token usage: 0.16, gen throughput (token/s): 46.40, #queue-req: 0
  3527. 2025-05-17 23:08:58,212 - __main__ - INFO - sglang running req: 1 queue req: 0
  3528. 2025-05-17 23:08:59,076 - sglang - INFO - [2025-05-17 23:08:59 TP0] Decode batch. #running-req: 1, #token: 6301, token usage: 0.17, gen throughput (token/s): 46.30, #queue-req: 0
  3529. 2025-05-17 23:08:59,076 - __main__ - INFO - sglang running req: 1 queue req: 0
  3530. 2025-05-17 23:08:59,937 - sglang - INFO - [2025-05-17 23:08:59 TP0] Decode batch. #running-req: 1, #token: 6341, token usage: 0.17, gen throughput (token/s): 46.46, #queue-req: 0
  3531. 2025-05-17 23:08:59,937 - __main__ - INFO - sglang running req: 1 queue req: 0
  3532. 2025-05-17 23:09:00,801 - sglang - INFO - [2025-05-17 23:09:00 TP0] Decode batch. #running-req: 1, #token: 6381, token usage: 0.17, gen throughput (token/s): 46.26, #queue-req: 0
  3533. 2025-05-17 23:09:00,802 - __main__ - INFO - sglang running req: 1 queue req: 0
  3534. 2025-05-17 23:09:01,665 - sglang - INFO - [2025-05-17 23:09:01 TP0] Decode batch. #running-req: 1, #token: 6421, token usage: 0.17, gen throughput (token/s): 46.32, #queue-req: 0
  3535. 2025-05-17 23:09:01,665 - __main__ - INFO - sglang running req: 1 queue req: 0
  3536. 2025-05-17 23:09:02,528 - sglang - INFO - [2025-05-17 23:09:02 TP0] Decode batch. #running-req: 1, #token: 6461, token usage: 0.17, gen throughput (token/s): 46.33, #queue-req: 0
  3537. 2025-05-17 23:09:02,528 - __main__ - INFO - sglang running req: 1 queue req: 0
  3538. 2025-05-17 23:09:03,390 - sglang - INFO - [2025-05-17 23:09:03 TP0] Decode batch. #running-req: 1, #token: 6501, token usage: 0.17, gen throughput (token/s): 46.42, #queue-req: 0
  3539. 2025-05-17 23:09:03,390 - __main__ - INFO - sglang running req: 1 queue req: 0
  3540. 2025-05-17 23:09:04,254 - sglang - INFO - [2025-05-17 23:09:04 TP0] Decode batch. #running-req: 1, #token: 6541, token usage: 0.17, gen throughput (token/s): 46.32, #queue-req: 0
  3541. 2025-05-17 23:09:04,254 - __main__ - INFO - sglang running req: 1 queue req: 0
  3542. 2025-05-17 23:09:04,284 - __main__ - INFO - Finished TaskGroup for worker on 9135f55c864185c3e61b48277b842dd16a718eb8
  3543. 2025-05-17 23:09:04,284 - __main__ - INFO - Got 1 docs for 9135f55c864185c3e61b48277b842dd16a718eb8
  3544. 2025-05-17 23:09:04,286 - __main__ - INFO - Worker 0 exiting due to empty queue
  3545. 2025-05-17 23:09:04,286 - __main__ - INFO - Work done
  3546. 2025-05-17 23:09:04,287 - __main__ - INFO - Got cancellation request for SGLang server
  3547. 2025-05-17 23:17:22,986 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  3548. 2025-05-17 23:17:22,986 - __main__ - INFO - Loading file at tests/gnarly_pdfs/delivery.pdf as PDF document
  3549. 2025-05-17 23:17:22,986 - __main__ - INFO - Found 1 total pdf paths to add
  3550. 2025-05-17 23:17:22,990 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  3551. 2025-05-17 23:17:23,256 - __main__ - INFO - Starting pipeline with PID 438495
  3552. 2025-05-17 23:17:23,256 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  3553. 2025-05-17 23:17:25,309 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  3554. 2025-05-17 23:17:26,355 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  3555. 2025-05-17 23:17:27,398 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  3556. 2025-05-17 23:17:28,431 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  3557. 2025-05-17 23:17:29,466 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  3558. 2025-05-17 23:17:30,512 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  3559. 2025-05-17 23:17:31,180 - sglang - INFO - [2025-05-17 23:17:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1021188320, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3560. 2025-05-17 23:17:31,180 - __main__ - INFO - [2025-05-17 23:17:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1021188320, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3561. 2025-05-17 23:17:31,559 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  3562. 2025-05-17 23:17:32,605 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  3563. 2025-05-17 23:17:33,651 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  3564. 2025-05-17 23:17:34,698 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  3565. 2025-05-17 23:17:35,746 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  3566. 2025-05-17 23:17:36,787 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  3567. 2025-05-17 23:17:37,845 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  3568. 2025-05-17 23:17:38,916 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  3569. 2025-05-17 23:17:39,980 - sglang - INFO - [2025-05-17 23:17:39] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3570. 2025-05-17 23:17:39,980 - __main__ - INFO - [2025-05-17 23:17:39] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3571. 2025-05-17 23:17:39,982 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  3572. 2025-05-17 23:17:41,048 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  3573. 2025-05-17 23:17:42,118 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  3574. 2025-05-17 23:17:43,188 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  3575. 2025-05-17 23:17:44,258 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  3576. 2025-05-17 23:17:45,324 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  3577. 2025-05-17 23:17:46,388 - sglang - INFO - [2025-05-17 23:17:46 TP0] Overlap scheduler is disabled for multimodal models.
  3578. 2025-05-17 23:17:46,389 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Overlap scheduler is disabled for multimodal models.
  3579. 2025-05-17 23:17:46,390 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  3580. 2025-05-17 23:17:46,921 - sglang - INFO - [2025-05-17 23:17:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3581. 2025-05-17 23:17:46,921 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3582. 2025-05-17 23:17:46,921 - sglang - INFO - [2025-05-17 23:17:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3583. 2025-05-17 23:17:46,922 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3584. 2025-05-17 23:17:46,922 - sglang - INFO - [2025-05-17 23:17:46 TP0] Init torch distributed begin.
  3585. 2025-05-17 23:17:46,922 - __main__ - INFO - [2025-05-17 23:17:46 TP0] Init torch distributed begin.
  3586. 2025-05-17 23:17:47,469 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  3587. 2025-05-17 23:17:48,527 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  3588. 2025-05-17 23:17:49,593 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  3589. 2025-05-17 23:17:50,662 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  3590. 2025-05-17 23:17:51,732 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  3591. 2025-05-17 23:17:52,271 - sglang - INFO - [2025-05-17 23:17:52 TP0] Load weight begin. avail mem=23.33 GB
  3592. 2025-05-17 23:17:52,271 - __main__ - INFO - [2025-05-17 23:17:52 TP0] Load weight begin. avail mem=23.33 GB
  3593. 2025-05-17 23:17:52,811 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  3594. 2025-05-17 23:17:53,877 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  3595. 2025-05-17 23:17:53,922 - sglang - INFO - [2025-05-17 23:17:53 TP0] Using model weights format ['*.safetensors']
  3596. 2025-05-17 23:17:53,922 - __main__ - INFO - [2025-05-17 23:17:53 TP0] Using model weights format ['*.safetensors']
  3597. 2025-05-17 23:17:54,483 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3598. 2025-05-17 23:17:54,483 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3599. 2025-05-17 23:17:54,759 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
  3600. 2025-05-17 23:17:54,759 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.63it/s]
  3601. 2025-05-17 23:17:54,958 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  3602. 2025-05-17 23:17:55,683 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3603. 2025-05-17 23:17:55,683 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3604. 2025-05-17 23:17:56,038 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  3605. 2025-05-17 23:17:56,614 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  3606. 2025-05-17 23:17:56,614 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  3607. 2025-05-17 23:17:57,117 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  3608. 2025-05-17 23:17:57,520 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  3609. 2025-05-17 23:17:57,520 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  3610. 2025-05-17 23:17:57,520 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  3611. 2025-05-17 23:17:57,520 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  3612. 2025-05-17 23:17:57,520 - sglang - INFO -
  3613. 2025-05-17 23:17:57,520 - __main__ - INFO -
  3614. 2025-05-17 23:17:57,667 - sglang - INFO - [2025-05-17 23:17:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3615. 2025-05-17 23:17:57,667 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3616. 2025-05-17 23:17:57,702 - sglang - INFO - [2025-05-17 23:17:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3617. 2025-05-17 23:17:57,702 - __main__ - INFO - [2025-05-17 23:17:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3618. 2025-05-17 23:17:57,702 - sglang - INFO - [2025-05-17 23:17:57 TP0] Memory pool end. avail mem=5.30 GB
  3619. 2025-05-17 23:17:57,702 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Memory pool end. avail mem=5.30 GB
  3620. 2025-05-17 23:17:57,869 - sglang - INFO - [2025-05-17 23:17:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  3621. 2025-05-17 23:17:57,869 - __main__ - INFO - [2025-05-17 23:17:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  3622. 2025-05-17 23:17:58,196 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  3623. 2025-05-17 23:17:59,276 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  3624. 2025-05-17 23:17:59,675 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.76it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.32it/s] 100%|██████████| 4/4 [00:01<00:00, 2.74it/s] 100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
  3625. 2025-05-17 23:17:59,675 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.76it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.32it/s] 100%|██████████| 4/4 [00:01<00:00, 2.74it/s] 100%|██████████| 4/4 [00:01<00:00, 2.22it/s]
  3626. 2025-05-17 23:17:59,675 - sglang - INFO - [2025-05-17 23:17:59 TP0] Capture cuda graph end. Time elapsed: 1.81 s
  3627. 2025-05-17 23:17:59,675 - __main__ - INFO - [2025-05-17 23:17:59 TP0] Capture cuda graph end. Time elapsed: 1.81 s
  3628. 2025-05-17 23:18:00,356 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  3629. 2025-05-17 23:18:01,426 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  3630. 2025-05-17 23:18:02,493 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  3631. 2025-05-17 23:18:02,593 - sglang - INFO - [2025-05-17 23:18:02 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3632. 2025-05-17 23:18:02,593 - __main__ - INFO - [2025-05-17 23:18:02 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3633. 2025-05-17 23:18:03,575 - __main__ - INFO - sglang server is ready.
  3634. 2025-05-17 23:18:03,575 - __main__ - INFO - Queue remaining: 1
  3635. 2025-05-17 23:18:03,575 - __main__ - INFO -
  3636. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3637. ----------------------------------------------------------------------------------
  3638. 2025-05-17 23:18:03,576 - __main__ - INFO -
  3639. Worker ID
  3640. ---------
  3641. 2025-05-17 23:18:03,576 - __main__ - INFO - Worker 0 processing work item 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
  3642. 2025-05-17 23:18:03,576 - __main__ - INFO - Created all tasks for 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
  3643. 2025-05-17 23:18:03,582 - __main__ - INFO - Got 5 pages to do for tests/gnarly_pdfs/delivery.pdf in worker 0
  3644. 2025-05-17 23:18:03,674 - sglang - INFO - [2025-05-17 23:18:03 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3645. 2025-05-17 23:18:03,674 - __main__ - INFO - [2025-05-17 23:18:03 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3646. 2025-05-17 23:18:03,674 - __main__ - INFO - sglang running req: 0 queue req: 0
  3647. 2025-05-17 23:18:04,263 - sglang - INFO - [2025-05-17 23:18:04] The server is fired up and ready to roll!
  3648. 2025-05-17 23:18:04,263 - __main__ - INFO - [2025-05-17 23:18:04] The server is fired up and ready to roll!
  3649. 2025-05-17 23:18:09,894 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-1
  3650. 2025-05-17 23:18:09,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-2
  3651. 2025-05-17 23:18:09,973 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-3
  3652. 2025-05-17 23:18:10,002 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-4
  3653. 2025-05-17 23:18:10,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/delivery.pdf-5
  3654. 2025-05-17 23:18:13,579 - __main__ - INFO - Queue remaining: 0
  3655. 2025-05-17 23:18:13,579 - __main__ - INFO -
  3656. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3657. ----------------------------------------------------------------------------------
  3658. 2025-05-17 23:18:13,579 - __main__ - INFO -
  3659. Worker ID | started
  3660. ----------+--------
  3661. 0 | 5
  3662. 2025-05-17 23:18:23,581 - __main__ - INFO - Queue remaining: 0
  3663. 2025-05-17 23:18:23,581 - __main__ - INFO -
  3664. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3665. ----------------------------------------------------------------------------------
  3666. 2025-05-17 23:18:23,581 - __main__ - INFO -
  3667. Worker ID | started
  3668. ----------+--------
  3669. 0 | 5
  3670. 2025-05-17 23:18:31,593 - sglang - INFO - [2025-05-17 23:18:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3671. 2025-05-17 23:18:31,593 - __main__ - INFO - sglang running req: 0 queue req: 0
  3672. 2025-05-17 23:18:32,439 - sglang - INFO - [2025-05-17 23:18:32 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  3673. 2025-05-17 23:18:32,440 - __main__ - INFO - sglang running req: 1 queue req: 0
  3674. 2025-05-17 23:18:33,583 - __main__ - INFO - Queue remaining: 0
  3675. 2025-05-17 23:18:33,584 - __main__ - INFO -
  3676. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3677. ----------------------------------------------------------------------------------
  3678. 2025-05-17 23:18:33,584 - __main__ - INFO -
  3679. Worker ID | started
  3680. ----------+--------
  3681. 0 | 5
  3682. 2025-05-17 23:18:34,690 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  3683. 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 1 exiting due to empty queue
  3684. 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 2 exiting due to empty queue
  3685. 2025-05-17 23:18:34,691 - __main__ - INFO - Worker 3 exiting due to empty queue
  3686. 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 4 exiting due to empty queue
  3687. 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 5 exiting due to empty queue
  3688. 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 6 exiting due to empty queue
  3689. 2025-05-17 23:18:34,692 - __main__ - INFO - Worker 7 exiting due to empty queue
  3690. 2025-05-17 23:18:35,880 - sglang - INFO - [2025-05-17 23:18:35 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.17, #queue-req: 0
  3691. 2025-05-17 23:18:35,881 - __main__ - INFO - sglang running req: 5 queue req: 0
  3692. 2025-05-17 23:18:36,743 - sglang - INFO - [2025-05-17 23:18:36 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.77, #queue-req: 0
  3693. 2025-05-17 23:18:36,743 - __main__ - INFO - sglang running req: 5 queue req: 0
  3694. 2025-05-17 23:18:37,600 - sglang - INFO - [2025-05-17 23:18:37 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.38, #queue-req: 0
  3695. 2025-05-17 23:18:37,600 - __main__ - INFO - sglang running req: 5 queue req: 0
  3696. 2025-05-17 23:18:38,457 - sglang - INFO - [2025-05-17 23:18:38 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.51, #queue-req: 0
  3697. 2025-05-17 23:18:38,457 - __main__ - INFO - sglang running req: 5 queue req: 0
  3698. 2025-05-17 23:18:39,317 - sglang - INFO - [2025-05-17 23:18:39 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.53, #queue-req: 0
  3699. 2025-05-17 23:18:39,317 - __main__ - INFO - sglang running req: 5 queue req: 0
  3700. 2025-05-17 23:18:40,184 - sglang - INFO - [2025-05-17 23:18:40 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 230.54, #queue-req: 0
  3701. 2025-05-17 23:18:40,185 - __main__ - INFO - sglang running req: 5 queue req: 0
  3702. 2025-05-17 23:18:41,047 - sglang - INFO - [2025-05-17 23:18:41 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.74, #queue-req: 0
  3703. 2025-05-17 23:18:41,048 - __main__ - INFO - sglang running req: 5 queue req: 0
  3704. 2025-05-17 23:18:41,911 - sglang - INFO - [2025-05-17 23:18:41 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.58, #queue-req: 0
  3705. 2025-05-17 23:18:41,911 - __main__ - INFO - sglang running req: 5 queue req: 0
  3706. 2025-05-17 23:18:42,772 - sglang - INFO - [2025-05-17 23:18:42 TP0] Decode batch. #running-req: 4, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.45, #queue-req: 0
  3707. 2025-05-17 23:18:42,772 - __main__ - INFO - sglang running req: 4 queue req: 0
  3708. 2025-05-17 23:18:43,585 - __main__ - INFO - Queue remaining: 0
  3709. 2025-05-17 23:18:43,586 - __main__ - INFO -
  3710. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3711. ----------------------------------------------------------------------------------
  3712. sglang_input_tokens 49.92 49.92
  3713. sglang_output_tokens 8.63 8.63
  3714. 2025-05-17 23:18:43,586 - __main__ - INFO -
  3715. Worker ID | finished | started
  3716. ----------+----------+--------
  3717. 0 | 2 | 5
  3718. 2025-05-17 23:18:43,622 - sglang - INFO - [2025-05-17 23:18:43 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 141.16, #queue-req: 0
  3719. 2025-05-17 23:18:43,622 - __main__ - INFO - sglang running req: 3 queue req: 0
  3720. 2025-05-17 23:18:44,460 - sglang - INFO - [2025-05-17 23:18:44 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 82.31, #queue-req: 0
  3721. 2025-05-17 23:18:44,460 - __main__ - INFO - sglang running req: 1 queue req: 0
  3722. 2025-05-17 23:18:45,287 - sglang - INFO - [2025-05-17 23:18:45 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.39, #queue-req: 0
  3723. 2025-05-17 23:18:45,287 - __main__ - INFO - sglang running req: 1 queue req: 0
  3724. 2025-05-17 23:18:45,952 - __main__ - INFO - Finished TaskGroup for worker on 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
  3725. 2025-05-17 23:18:45,952 - __main__ - INFO - Got 1 docs for 9cfcb1fe084c9aada33c41b2707ba7baa495d3ef
  3726. 2025-05-17 23:18:45,954 - __main__ - INFO - Worker 0 exiting due to empty queue
  3727. 2025-05-17 23:18:45,954 - __main__ - INFO - Work done
  3728. 2025-05-17 23:18:45,954 - __main__ - INFO - Got cancellation request for SGLang server
  3729. 2025-05-17 23:21:20,288 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  3730. 2025-05-17 23:21:20,288 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495273/input.pdf as PDF document
  3731. 2025-05-17 23:21:20,288 - __main__ - INFO - Found 1 total pdf paths to add
  3732. 2025-05-17 23:21:20,292 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  3733. 2025-05-17 23:21:20,495 - __main__ - INFO - Starting pipeline with PID 447964
  3734. 2025-05-17 23:21:20,496 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  3735. 2025-05-17 23:21:26,128 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  3736. 2025-05-17 23:21:27,177 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  3737. 2025-05-17 23:21:28,223 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  3738. 2025-05-17 23:21:29,266 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  3739. 2025-05-17 23:21:30,311 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  3740. 2025-05-17 23:21:31,351 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  3741. 2025-05-17 23:21:31,508 - sglang - INFO - [2025-05-17 23:21:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=842359968, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3742. 2025-05-17 23:21:31,508 - __main__ - INFO - [2025-05-17 23:21:31] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=842359968, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3743. 2025-05-17 23:21:32,420 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  3744. 2025-05-17 23:21:33,486 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  3745. 2025-05-17 23:21:34,552 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  3746. 2025-05-17 23:21:35,619 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  3747. 2025-05-17 23:21:36,689 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  3748. 2025-05-17 23:21:37,760 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  3749. 2025-05-17 23:21:38,829 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  3750. 2025-05-17 23:21:39,898 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  3751. 2025-05-17 23:21:40,727 - sglang - INFO - [2025-05-17 23:21:40 TP0] Overlap scheduler is disabled for multimodal models.
  3752. 2025-05-17 23:21:40,727 - __main__ - INFO - [2025-05-17 23:21:40 TP0] Overlap scheduler is disabled for multimodal models.
  3753. 2025-05-17 23:21:40,975 - sglang - INFO - [2025-05-17 23:21:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3754. 2025-05-17 23:21:40,976 - __main__ - INFO - [2025-05-17 23:21:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3755. 2025-05-17 23:21:40,977 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  3756. 2025-05-17 23:21:41,212 - sglang - INFO - [2025-05-17 23:21:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3757. 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3758. 2025-05-17 23:21:41,213 - sglang - INFO - [2025-05-17 23:21:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3759. 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3760. 2025-05-17 23:21:41,213 - sglang - INFO - [2025-05-17 23:21:41 TP0] Init torch distributed begin.
  3761. 2025-05-17 23:21:41,213 - __main__ - INFO - [2025-05-17 23:21:41 TP0] Init torch distributed begin.
  3762. 2025-05-17 23:21:42,056 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  3763. 2025-05-17 23:21:43,121 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  3764. 2025-05-17 23:21:44,184 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  3765. 2025-05-17 23:21:45,238 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  3766. 2025-05-17 23:21:46,302 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  3767. 2025-05-17 23:21:46,538 - sglang - INFO - [2025-05-17 23:21:46 TP0] Load weight begin. avail mem=23.33 GB
  3768. 2025-05-17 23:21:46,538 - __main__ - INFO - [2025-05-17 23:21:46 TP0] Load weight begin. avail mem=23.33 GB
  3769. 2025-05-17 23:21:47,381 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  3770. 2025-05-17 23:21:47,564 - sglang - INFO - [2025-05-17 23:21:47 TP0] Using model weights format ['*.safetensors']
  3771. 2025-05-17 23:21:47,565 - __main__ - INFO - [2025-05-17 23:21:47 TP0] Using model weights format ['*.safetensors']
  3772. 2025-05-17 23:21:48,460 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  3773. 2025-05-17 23:21:48,525 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3774. 2025-05-17 23:21:48,525 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3775. 2025-05-17 23:21:48,812 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
  3776. 2025-05-17 23:21:48,813 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
  3777. 2025-05-17 23:21:49,539 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  3778. 2025-05-17 23:21:49,732 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3779. 2025-05-17 23:21:49,732 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  3780. 2025-05-17 23:21:50,620 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  3781. 2025-05-17 23:21:50,652 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  3782. 2025-05-17 23:21:50,652 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  3783. 2025-05-17 23:21:51,550 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  3784. 2025-05-17 23:21:51,550 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  3785. 2025-05-17 23:21:51,550 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  3786. 2025-05-17 23:21:51,550 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  3787. 2025-05-17 23:21:51,550 - sglang - INFO -
  3788. 2025-05-17 23:21:51,550 - __main__ - INFO -
  3789. 2025-05-17 23:21:51,696 - sglang - INFO - [2025-05-17 23:21:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3790. 2025-05-17 23:21:51,696 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3791. 2025-05-17 23:21:51,696 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  3792. 2025-05-17 23:21:51,704 - sglang - INFO - [2025-05-17 23:21:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3793. 2025-05-17 23:21:51,704 - __main__ - INFO - [2025-05-17 23:21:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3794. 2025-05-17 23:21:51,704 - sglang - INFO - [2025-05-17 23:21:51 TP0] Memory pool end. avail mem=5.30 GB
  3795. 2025-05-17 23:21:51,704 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Memory pool end. avail mem=5.30 GB
  3796. 2025-05-17 23:21:51,879 - sglang - INFO - [2025-05-17 23:21:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  3797. 2025-05-17 23:21:51,879 - __main__ - INFO - [2025-05-17 23:21:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  3798. 2025-05-17 23:21:52,776 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  3799. 2025-05-17 23:21:53,582 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.85it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s] 100%|██████████| 4/4 [00:01<00:00, 2.92it/s] 100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
  3800. 2025-05-17 23:21:53,582 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.85it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s] 100%|██████████| 4/4 [00:01<00:00, 2.92it/s] 100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
  3801. 2025-05-17 23:21:53,583 - sglang - INFO - [2025-05-17 23:21:53 TP0] Capture cuda graph end. Time elapsed: 1.70 s
  3802. 2025-05-17 23:21:53,583 - __main__ - INFO - [2025-05-17 23:21:53 TP0] Capture cuda graph end. Time elapsed: 1.70 s
  3803. 2025-05-17 23:21:53,856 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  3804. 2025-05-17 23:21:54,926 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  3805. 2025-05-17 23:21:55,998 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  3806. 2025-05-17 23:21:56,147 - sglang - INFO - [2025-05-17 23:21:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3807. 2025-05-17 23:21:56,147 - __main__ - INFO - [2025-05-17 23:21:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3808. 2025-05-17 23:21:57,090 - __main__ - INFO - sglang server is ready.
  3809. 2025-05-17 23:21:57,091 - __main__ - INFO - Queue remaining: 1
  3810. 2025-05-17 23:21:57,091 - __main__ - INFO -
  3811. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3812. ----------------------------------------------------------------------------------
  3813. 2025-05-17 23:21:57,091 - __main__ - INFO -
  3814. Worker ID
  3815. ---------
  3816. 2025-05-17 23:21:57,091 - __main__ - INFO - Worker 0 processing work item 88731f2783fb8112f0205c218828d88dc213896f
  3817. 2025-05-17 23:21:57,091 - __main__ - INFO - Created all tasks for 88731f2783fb8112f0205c218828d88dc213896f
  3818. 2025-05-17 23:21:57,098 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747495273/input.pdf in worker 0
  3819. 2025-05-17 23:21:57,223 - sglang - INFO - [2025-05-17 23:21:57 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3820. 2025-05-17 23:21:57,223 - __main__ - INFO - [2025-05-17 23:21:57 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3821. 2025-05-17 23:21:57,223 - __main__ - INFO - sglang running req: 0 queue req: 0
  3822. 2025-05-17 23:21:57,775 - sglang - INFO - [2025-05-17 23:21:57] The server is fired up and ready to roll!
  3823. 2025-05-17 23:21:57,775 - __main__ - INFO - [2025-05-17 23:21:57] The server is fired up and ready to roll!
  3824. 2025-05-17 23:22:03,589 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-1
  3825. 2025-05-17 23:22:03,604 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-2
  3826. 2025-05-17 23:22:03,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-3
  3827. 2025-05-17 23:22:03,623 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-4
  3828. 2025-05-17 23:22:03,644 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495273/input.pdf-5
  3829. 2025-05-17 23:22:07,179 - __main__ - INFO - Queue remaining: 0
  3830. 2025-05-17 23:22:07,179 - __main__ - INFO -
  3831. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3832. ----------------------------------------------------------------------------------
  3833. 2025-05-17 23:22:07,179 - __main__ - INFO -
  3834. Worker ID | started
  3835. ----------+--------
  3836. 0 | 5
  3837. 2025-05-17 23:22:17,180 - __main__ - INFO - Queue remaining: 0
  3838. 2025-05-17 23:22:17,181 - __main__ - INFO -
  3839. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3840. ----------------------------------------------------------------------------------
  3841. 2025-05-17 23:22:17,181 - __main__ - INFO -
  3842. Worker ID | started
  3843. ----------+--------
  3844. 0 | 5
  3845. 2025-05-17 23:22:24,459 - sglang - INFO - [2025-05-17 23:22:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2017, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3846. 2025-05-17 23:22:24,459 - __main__ - INFO - sglang running req: 0 queue req: 0
  3847. 2025-05-17 23:22:25,256 - sglang - INFO - [2025-05-17 23:22:25 TP0] Prefill batch. #new-seq: 4, #new-token: 8308, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  3848. 2025-05-17 23:22:25,257 - __main__ - INFO - sglang running req: 1 queue req: 0
  3849. 2025-05-17 23:22:27,182 - __main__ - INFO - Queue remaining: 0
  3850. 2025-05-17 23:22:27,183 - __main__ - INFO -
  3851. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3852. ----------------------------------------------------------------------------------
  3853. 2025-05-17 23:22:27,183 - __main__ - INFO -
  3854. Worker ID | started
  3855. ----------+--------
  3856. 0 | 5
  3857. 2025-05-17 23:22:28,396 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  3858. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 1 exiting due to empty queue
  3859. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 2 exiting due to empty queue
  3860. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 3 exiting due to empty queue
  3861. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 4 exiting due to empty queue
  3862. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 5 exiting due to empty queue
  3863. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 6 exiting due to empty queue
  3864. 2025-05-17 23:22:28,396 - __main__ - INFO - Worker 7 exiting due to empty queue
  3865. 2025-05-17 23:22:28,693 - sglang - INFO - [2025-05-17 23:22:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.28, #queue-req: 0
  3866. 2025-05-17 23:22:28,694 - __main__ - INFO - sglang running req: 5 queue req: 0
  3867. 2025-05-17 23:22:29,550 - sglang - INFO - [2025-05-17 23:22:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.36, #queue-req: 0
  3868. 2025-05-17 23:22:29,551 - __main__ - INFO - sglang running req: 5 queue req: 0
  3869. 2025-05-17 23:22:30,406 - sglang - INFO - [2025-05-17 23:22:30 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.74, #queue-req: 0
  3870. 2025-05-17 23:22:30,406 - __main__ - INFO - sglang running req: 5 queue req: 0
  3871. 2025-05-17 23:22:31,261 - sglang - INFO - [2025-05-17 23:22:31 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.82, #queue-req: 0
  3872. 2025-05-17 23:22:31,261 - __main__ - INFO - sglang running req: 5 queue req: 0
  3873. 2025-05-17 23:22:32,117 - sglang - INFO - [2025-05-17 23:22:32 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.68, #queue-req: 0
  3874. 2025-05-17 23:22:32,118 - __main__ - INFO - sglang running req: 5 queue req: 0
  3875. 2025-05-17 23:22:32,976 - sglang - INFO - [2025-05-17 23:22:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.04, #queue-req: 0
  3876. 2025-05-17 23:22:32,976 - __main__ - INFO - sglang running req: 5 queue req: 0
  3877. 2025-05-17 23:22:33,836 - sglang - INFO - [2025-05-17 23:22:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.38, #queue-req: 0
  3878. 2025-05-17 23:22:33,836 - __main__ - INFO - sglang running req: 5 queue req: 0
  3879. 2025-05-17 23:22:34,698 - sglang - INFO - [2025-05-17 23:22:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.19, #queue-req: 0
  3880. 2025-05-17 23:22:34,698 - __main__ - INFO - sglang running req: 5 queue req: 0
  3881. 2025-05-17 23:22:35,556 - sglang - INFO - [2025-05-17 23:22:35 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.71, #queue-req: 0
  3882. 2025-05-17 23:22:35,556 - __main__ - INFO - sglang running req: 3 queue req: 0
  3883. 2025-05-17 23:22:36,397 - sglang - INFO - [2025-05-17 23:22:36 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.61, #queue-req: 0
  3884. 2025-05-17 23:22:36,398 - __main__ - INFO - sglang running req: 3 queue req: 0
  3885. 2025-05-17 23:22:37,184 - __main__ - INFO - Queue remaining: 0
  3886. 2025-05-17 23:22:37,184 - __main__ - INFO -
  3887. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3888. ----------------------------------------------------------------------------------
  3889. sglang_input_tokens 101.07 101.07
  3890. sglang_output_tokens 19.66 19.66
  3891. 2025-05-17 23:22:37,185 - __main__ - INFO -
  3892. Worker ID | finished | started
  3893. ----------+----------+--------
  3894. 0 | 4 | 5
  3895. 2025-05-17 23:22:37,230 - sglang - INFO - [2025-05-17 23:22:37 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.06, #queue-req: 0
  3896. 2025-05-17 23:22:37,230 - __main__ - INFO - sglang running req: 1 queue req: 0
  3897. 2025-05-17 23:22:38,054 - sglang - INFO - [2025-05-17 23:22:38 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.52, #queue-req: 0
  3898. 2025-05-17 23:22:38,055 - __main__ - INFO - sglang running req: 1 queue req: 0
  3899. 2025-05-17 23:22:38,843 - __main__ - INFO - Finished TaskGroup for worker on 88731f2783fb8112f0205c218828d88dc213896f
  3900. 2025-05-17 23:22:38,843 - __main__ - INFO - Got 1 docs for 88731f2783fb8112f0205c218828d88dc213896f
  3901. 2025-05-17 23:22:38,844 - __main__ - INFO - Worker 0 exiting due to empty queue
  3902. 2025-05-17 23:22:38,845 - __main__ - INFO - Work done
  3903. 2025-05-17 23:22:38,845 - __main__ - INFO - Got cancellation request for SGLang server
  3904. 2025-05-17 23:27:55,455 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  3905. 2025-05-17 23:27:55,455 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495669/input.pdf as PDF document
  3906. 2025-05-17 23:27:55,455 - __main__ - INFO - Found 1 total pdf paths to add
  3907. 2025-05-17 23:27:55,457 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  3908. 2025-05-17 23:27:55,725 - __main__ - INFO - Starting pipeline with PID 450922
  3909. 2025-05-17 23:27:55,725 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  3910. 2025-05-17 23:28:01,352 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  3911. 2025-05-17 23:28:02,398 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  3912. 2025-05-17 23:28:03,447 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  3913. 2025-05-17 23:28:04,514 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  3914. 2025-05-17 23:28:05,549 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  3915. 2025-05-17 23:28:06,599 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  3916. 2025-05-17 23:28:07,200 - sglang - INFO - [2025-05-17 23:28:07] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=817117709, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3917. 2025-05-17 23:28:07,201 - __main__ - INFO - [2025-05-17 23:28:07] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=817117709, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  3918. 2025-05-17 23:28:07,673 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  3919. 2025-05-17 23:28:08,739 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  3920. 2025-05-17 23:28:09,806 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  3921. 2025-05-17 23:28:10,874 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  3922. 2025-05-17 23:28:11,941 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  3923. 2025-05-17 23:28:13,012 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  3924. 2025-05-17 23:28:14,075 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  3925. 2025-05-17 23:28:15,145 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  3926. 2025-05-17 23:28:16,212 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  3927. 2025-05-17 23:28:16,415 - sglang - INFO - [2025-05-17 23:28:16 TP0] Overlap scheduler is disabled for multimodal models.
  3928. 2025-05-17 23:28:16,416 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Overlap scheduler is disabled for multimodal models.
  3929. 2025-05-17 23:28:16,419 - sglang - INFO - [2025-05-17 23:28:16] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3930. 2025-05-17 23:28:16,420 - __main__ - INFO - [2025-05-17 23:28:16] Use chat template for the OpenAI-compatible API server: qwen2-vl
  3931. 2025-05-17 23:28:16,894 - sglang - INFO - [2025-05-17 23:28:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3932. 2025-05-17 23:28:16,894 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  3933. 2025-05-17 23:28:16,894 - sglang - INFO - [2025-05-17 23:28:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3934. 2025-05-17 23:28:16,894 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  3935. 2025-05-17 23:28:16,895 - sglang - INFO - [2025-05-17 23:28:16 TP0] Init torch distributed begin.
  3936. 2025-05-17 23:28:16,895 - __main__ - INFO - [2025-05-17 23:28:16 TP0] Init torch distributed begin.
  3937. 2025-05-17 23:28:17,287 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  3938. 2025-05-17 23:28:18,354 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  3939. 2025-05-17 23:28:19,418 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  3940. 2025-05-17 23:28:20,473 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  3941. 2025-05-17 23:28:21,540 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  3942. 2025-05-17 23:28:22,219 - sglang - INFO - [2025-05-17 23:28:22 TP0] Load weight begin. avail mem=23.33 GB
  3943. 2025-05-17 23:28:22,219 - __main__ - INFO - [2025-05-17 23:28:22 TP0] Load weight begin. avail mem=23.33 GB
  3944. 2025-05-17 23:28:22,618 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  3945. 2025-05-17 23:28:23,283 - sglang - INFO - [2025-05-17 23:28:23 TP0] Using model weights format ['*.safetensors']
  3946. 2025-05-17 23:28:23,283 - __main__ - INFO - [2025-05-17 23:28:23 TP0] Using model weights format ['*.safetensors']
  3947. 2025-05-17 23:28:23,694 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  3948. 2025-05-17 23:28:23,782 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3949. 2025-05-17 23:28:23,782 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  3950. 2025-05-17 23:28:24,076 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.40it/s]
  3951. 2025-05-17 23:28:24,076 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.40it/s]
  3952. 2025-05-17 23:28:24,763 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  3953. 2025-05-17 23:28:25,059 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.43it/s]
  3954. 2025-05-17 23:28:25,060 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.43it/s]
  3955. 2025-05-17 23:28:25,840 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  3956. 2025-05-17 23:28:26,064 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.19it/s]
  3957. 2025-05-17 23:28:26,065 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.19it/s]
  3958. 2025-05-17 23:28:26,937 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  3959. 2025-05-17 23:28:27,042 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  3960. 2025-05-17 23:28:27,042 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  3961. 2025-05-17 23:28:27,042 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
  3962. 2025-05-17 23:28:27,042 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
  3963. 2025-05-17 23:28:27,042 - sglang - INFO -
  3964. 2025-05-17 23:28:27,042 - __main__ - INFO -
  3965. 2025-05-17 23:28:27,192 - sglang - INFO - [2025-05-17 23:28:27 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3966. 2025-05-17 23:28:27,193 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  3967. 2025-05-17 23:28:27,199 - sglang - INFO - [2025-05-17 23:28:27 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3968. 2025-05-17 23:28:27,200 - __main__ - INFO - [2025-05-17 23:28:27 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  3969. 2025-05-17 23:28:27,200 - sglang - INFO - [2025-05-17 23:28:27 TP0] Memory pool end. avail mem=5.30 GB
  3970. 2025-05-17 23:28:27,200 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Memory pool end. avail mem=5.30 GB
  3971. 2025-05-17 23:28:27,367 - sglang - INFO - [2025-05-17 23:28:27 TP0] Capture cuda graph begin. This can take up to several minutes.
  3972. 2025-05-17 23:28:27,368 - __main__ - INFO - [2025-05-17 23:28:27 TP0] Capture cuda graph begin. This can take up to several minutes.
  3973. 2025-05-17 23:28:28,015 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  3974. 2025-05-17 23:28:29,091 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  3975. 2025-05-17 23:28:29,145 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.77it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s] 100%|██████████| 4/4 [00:01<00:00, 2.78it/s] 100%|██████████| 4/4 [00:01<00:00, 2.25it/s]
  3976. 2025-05-17 23:28:29,145 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.77it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.33it/s] 100%|██████████| 4/4 [00:01<00:00, 2.78it/s] 100%|██████████| 4/4 [00:01<00:00, 2.25it/s]
  3977. 2025-05-17 23:28:29,145 - sglang - INFO - [2025-05-17 23:28:29 TP0] Capture cuda graph end. Time elapsed: 1.78 s
  3978. 2025-05-17 23:28:29,145 - __main__ - INFO - [2025-05-17 23:28:29 TP0] Capture cuda graph end. Time elapsed: 1.78 s
  3979. 2025-05-17 23:28:30,167 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  3980. 2025-05-17 23:28:31,239 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  3981. 2025-05-17 23:28:31,599 - sglang - INFO - [2025-05-17 23:28:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3982. 2025-05-17 23:28:31,599 - __main__ - INFO - [2025-05-17 23:28:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  3983. 2025-05-17 23:28:32,331 - __main__ - INFO - sglang server is ready.
  3984. 2025-05-17 23:28:32,331 - __main__ - INFO - Queue remaining: 1
  3985. 2025-05-17 23:28:32,331 - __main__ - INFO -
  3986. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  3987. ----------------------------------------------------------------------------------
  3988. 2025-05-17 23:28:32,331 - __main__ - INFO -
  3989. Worker ID
  3990. ---------
  3991. 2025-05-17 23:28:32,332 - __main__ - INFO - Worker 0 processing work item 53fa84f8de7c7f6853d89db3cf39d246b300e93f
  3992. 2025-05-17 23:28:32,332 - __main__ - INFO - Created all tasks for 53fa84f8de7c7f6853d89db3cf39d246b300e93f
  3993. 2025-05-17 23:28:32,334 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747495669/input.pdf in worker 0
  3994. 2025-05-17 23:28:32,676 - sglang - INFO - [2025-05-17 23:28:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3995. 2025-05-17 23:28:32,677 - __main__ - INFO - [2025-05-17 23:28:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  3996. 2025-05-17 23:28:32,677 - __main__ - INFO - sglang running req: 0 queue req: 0
  3997. 2025-05-17 23:28:33,303 - sglang - INFO - [2025-05-17 23:28:33] The server is fired up and ready to roll!
  3998. 2025-05-17 23:28:33,303 - __main__ - INFO - [2025-05-17 23:28:33] The server is fired up and ready to roll!
  3999. 2025-05-17 23:28:38,667 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495669/input.pdf-1
  4000. 2025-05-17 23:28:42,379 - __main__ - INFO - Queue remaining: 0
  4001. 2025-05-17 23:28:42,379 - __main__ - INFO -
  4002. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4003. ----------------------------------------------------------------------------------
  4004. 2025-05-17 23:28:42,379 - __main__ - INFO -
  4005. Worker ID | started
  4006. ----------+--------
  4007. 0 | 1
  4008. 2025-05-17 23:28:52,381 - __main__ - INFO - Queue remaining: 0
  4009. 2025-05-17 23:28:52,381 - __main__ - INFO -
  4010. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4011. ----------------------------------------------------------------------------------
  4012. 2025-05-17 23:28:52,381 - __main__ - INFO -
  4013. Worker ID | started
  4014. ----------+--------
  4015. 0 | 1
  4016. 2025-05-17 23:28:59,979 - sglang - INFO - [2025-05-17 23:28:59 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4017. 2025-05-17 23:28:59,979 - __main__ - INFO - sglang running req: 0 queue req: 0
  4018. 2025-05-17 23:29:01,385 - sglang - INFO - [2025-05-17 23:29:01 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.34, #queue-req: 0
  4019. 2025-05-17 23:29:01,385 - __main__ - INFO - sglang running req: 1 queue req: 0
  4020. 2025-05-17 23:29:02,203 - sglang - INFO - [2025-05-17 23:29:02 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
  4021. 2025-05-17 23:29:02,204 - __main__ - INFO - sglang running req: 1 queue req: 0
  4022. 2025-05-17 23:29:02,382 - __main__ - INFO - Queue remaining: 0
  4023. 2025-05-17 23:29:02,382 - __main__ - INFO -
  4024. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4025. ----------------------------------------------------------------------------------
  4026. 2025-05-17 23:29:02,383 - __main__ - INFO -
  4027. Worker ID | started
  4028. ----------+--------
  4029. 0 | 1
  4030. 2025-05-17 23:29:03,021 - sglang - INFO - [2025-05-17 23:29:03 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.90, #queue-req: 0
  4031. 2025-05-17 23:29:03,022 - __main__ - INFO - sglang running req: 1 queue req: 0
  4032. 2025-05-17 23:29:03,588 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4033. 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 1 exiting due to empty queue
  4034. 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 2 exiting due to empty queue
  4035. 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 3 exiting due to empty queue
  4036. 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 4 exiting due to empty queue
  4037. 2025-05-17 23:29:03,589 - __main__ - INFO - Worker 5 exiting due to empty queue
  4038. 2025-05-17 23:29:03,590 - __main__ - INFO - Worker 6 exiting due to empty queue
  4039. 2025-05-17 23:29:03,590 - __main__ - INFO - Worker 7 exiting due to empty queue
  4040. 2025-05-17 23:29:03,839 - sglang - INFO - [2025-05-17 23:29:03 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.93, #queue-req: 0
  4041. 2025-05-17 23:29:03,839 - __main__ - INFO - sglang running req: 1 queue req: 0
  4042. 2025-05-17 23:29:04,658 - sglang - INFO - [2025-05-17 23:29:04 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.85, #queue-req: 0
  4043. 2025-05-17 23:29:04,658 - __main__ - INFO - sglang running req: 1 queue req: 0
  4044. 2025-05-17 23:29:05,477 - sglang - INFO - [2025-05-17 23:29:05 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.80, #queue-req: 0
  4045. 2025-05-17 23:29:05,478 - __main__ - INFO - sglang running req: 1 queue req: 0
  4046. 2025-05-17 23:29:06,297 - sglang - INFO - [2025-05-17 23:29:06 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.77, #queue-req: 0
  4047. 2025-05-17 23:29:06,298 - __main__ - INFO - sglang running req: 1 queue req: 0
  4048. 2025-05-17 23:29:06,346 - __main__ - INFO - Finished TaskGroup for worker on 53fa84f8de7c7f6853d89db3cf39d246b300e93f
  4049. 2025-05-17 23:29:06,346 - __main__ - INFO - Got 1 docs for 53fa84f8de7c7f6853d89db3cf39d246b300e93f
  4050. 2025-05-17 23:29:06,347 - __main__ - INFO - Worker 0 exiting due to empty queue
  4051. 2025-05-17 23:29:06,348 - __main__ - INFO - Work done
  4052. 2025-05-17 23:29:06,348 - __main__ - INFO - Got cancellation request for SGLang server
  4053. 2025-05-17 23:29:16,250 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4054. 2025-05-17 23:29:16,250 - __main__ - INFO - Loading file at olmocr_workspace/job_1747495750/input.pdf as PDF document
  4055. 2025-05-17 23:29:16,250 - __main__ - INFO - Found 1 total pdf paths to add
  4056. 2025-05-17 23:29:16,254 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  4057. 2025-05-17 23:29:16,522 - __main__ - INFO - Starting pipeline with PID 452130
  4058. 2025-05-17 23:29:16,522 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4059. 2025-05-17 23:29:22,121 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4060. 2025-05-17 23:29:23,160 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4061. 2025-05-17 23:29:24,204 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4062. 2025-05-17 23:29:25,266 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4063. 2025-05-17 23:29:26,334 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4064. 2025-05-17 23:29:27,399 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4065. 2025-05-17 23:29:28,006 - sglang - INFO - [2025-05-17 23:29:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=166973500, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4066. 2025-05-17 23:29:28,006 - __main__ - INFO - [2025-05-17 23:29:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=166973500, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4067. 2025-05-17 23:29:28,475 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4068. 2025-05-17 23:29:29,553 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4069. 2025-05-17 23:29:30,621 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4070. 2025-05-17 23:29:31,689 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4071. 2025-05-17 23:29:32,756 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4072. 2025-05-17 23:29:33,823 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4073. 2025-05-17 23:29:34,891 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4074. 2025-05-17 23:29:35,958 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4075. 2025-05-17 23:29:37,027 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4076. 2025-05-17 23:29:37,409 - sglang - INFO - [2025-05-17 23:29:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4077. 2025-05-17 23:29:37,409 - __main__ - INFO - [2025-05-17 23:29:37] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4078. 2025-05-17 23:29:38,103 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4079. 2025-05-17 23:29:39,167 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4080. 2025-05-17 23:29:40,222 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4081. 2025-05-17 23:29:41,285 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4082. 2025-05-17 23:29:42,351 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4083. 2025-05-17 23:29:42,819 - sglang - INFO - [2025-05-17 23:29:42 TP0] Overlap scheduler is disabled for multimodal models.
  4084. 2025-05-17 23:29:42,819 - __main__ - INFO - [2025-05-17 23:29:42 TP0] Overlap scheduler is disabled for multimodal models.
  4085. 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4086. 2025-05-17 23:29:43,317 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4087. 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4088. 2025-05-17 23:29:43,317 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4089. 2025-05-17 23:29:43,317 - sglang - INFO - [2025-05-17 23:29:43 TP0] Init torch distributed begin.
  4090. 2025-05-17 23:29:43,318 - __main__ - INFO - [2025-05-17 23:29:43 TP0] Init torch distributed begin.
  4091. 2025-05-17 23:29:43,434 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4092. 2025-05-17 23:29:44,497 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4093. 2025-05-17 23:29:45,565 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4094. 2025-05-17 23:29:46,633 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4095. 2025-05-17 23:29:47,701 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4096. 2025-05-17 23:29:48,686 - sglang - INFO - [2025-05-17 23:29:48 TP0] Load weight begin. avail mem=23.33 GB
  4097. 2025-05-17 23:29:48,686 - __main__ - INFO - [2025-05-17 23:29:48 TP0] Load weight begin. avail mem=23.33 GB
  4098. 2025-05-17 23:29:48,771 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4099. 2025-05-17 23:29:49,830 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4100. 2025-05-17 23:29:50,698 - sglang - INFO - [2025-05-17 23:29:50 TP0] Using model weights format ['*.safetensors']
  4101. 2025-05-17 23:29:50,698 - __main__ - INFO - [2025-05-17 23:29:50 TP0] Using model weights format ['*.safetensors']
  4102. 2025-05-17 23:29:50,907 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4103. 2025-05-17 23:29:51,440 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4104. 2025-05-17 23:29:51,440 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4105. 2025-05-17 23:29:51,737 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.38it/s]
  4106. 2025-05-17 23:29:51,737 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.38it/s]
  4107. 2025-05-17 23:29:51,984 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4108. 2025-05-17 23:29:52,694 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
  4109. 2025-05-17 23:29:52,695 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
  4110. 2025-05-17 23:29:53,061 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4111. 2025-05-17 23:29:53,654 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.23it/s]
  4112. 2025-05-17 23:29:53,654 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.23it/s]
  4113. 2025-05-17 23:29:54,138 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  4114. 2025-05-17 23:29:54,597 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
  4115. 2025-05-17 23:29:54,598 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
  4116. 2025-05-17 23:29:54,598 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  4117. 2025-05-17 23:29:54,598 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  4118. 2025-05-17 23:29:54,598 - sglang - INFO -
  4119. 2025-05-17 23:29:54,598 - __main__ - INFO -
  4120. 2025-05-17 23:29:54,729 - sglang - INFO - [2025-05-17 23:29:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4121. 2025-05-17 23:29:54,729 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4122. 2025-05-17 23:29:54,735 - sglang - INFO - [2025-05-17 23:29:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4123. 2025-05-17 23:29:54,735 - __main__ - INFO - [2025-05-17 23:29:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4124. 2025-05-17 23:29:54,736 - sglang - INFO - [2025-05-17 23:29:54 TP0] Memory pool end. avail mem=5.30 GB
  4125. 2025-05-17 23:29:54,736 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Memory pool end. avail mem=5.30 GB
  4126. 2025-05-17 23:29:54,889 - sglang - INFO - [2025-05-17 23:29:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  4127. 2025-05-17 23:29:54,889 - __main__ - INFO - [2025-05-17 23:29:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  4128. 2025-05-17 23:29:55,215 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  4129. 2025-05-17 23:29:56,291 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  4130. 2025-05-17 23:29:56,581 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.91it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  4131. 2025-05-17 23:29:56,581 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.91it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  4132. 2025-05-17 23:29:56,581 - sglang - INFO - [2025-05-17 23:29:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  4133. 2025-05-17 23:29:56,581 - __main__ - INFO - [2025-05-17 23:29:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  4134. 2025-05-17 23:29:57,366 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  4135. 2025-05-17 23:29:58,433 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  4136. 2025-05-17 23:29:59,499 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  4137. 2025-05-17 23:29:59,589 - sglang - INFO - [2025-05-17 23:29:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4138. 2025-05-17 23:29:59,589 - __main__ - INFO - [2025-05-17 23:29:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4139. 2025-05-17 23:30:00,591 - __main__ - INFO - sglang server is ready.
  4140. 2025-05-17 23:30:00,592 - __main__ - INFO - Queue remaining: 1
  4141. 2025-05-17 23:30:00,592 - __main__ - INFO -
  4142. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4143. ----------------------------------------------------------------------------------
  4144. 2025-05-17 23:30:00,592 - __main__ - INFO -
  4145. Worker ID
  4146. ---------
  4147. 2025-05-17 23:30:00,592 - __main__ - INFO - Worker 0 processing work item d9af858998245f0877efab6a7aad7fa5652f8d23
  4148. 2025-05-17 23:30:00,592 - __main__ - INFO - Created all tasks for d9af858998245f0877efab6a7aad7fa5652f8d23
  4149. 2025-05-17 23:30:00,599 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747495750/input.pdf in worker 0
  4150. 2025-05-17 23:30:00,680 - sglang - INFO - [2025-05-17 23:30:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4151. 2025-05-17 23:30:00,680 - __main__ - INFO - [2025-05-17 23:30:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4152. 2025-05-17 23:30:00,681 - __main__ - INFO - sglang running req: 0 queue req: 0
  4153. 2025-05-17 23:30:01,264 - sglang - INFO - [2025-05-17 23:30:01] The server is fired up and ready to roll!
  4154. 2025-05-17 23:30:01,264 - __main__ - INFO - [2025-05-17 23:30:01] The server is fired up and ready to roll!
  4155. 2025-05-17 23:30:06,929 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-1
  4156. 2025-05-17 23:30:06,967 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-2
  4157. 2025-05-17 23:30:07,009 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-3
  4158. 2025-05-17 23:30:07,051 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-4
  4159. 2025-05-17 23:30:07,085 - __main__ - INFO - Built page query for olmocr_workspace/job_1747495750/input.pdf-5
  4160. 2025-05-17 23:30:10,679 - __main__ - INFO - Queue remaining: 0
  4161. 2025-05-17 23:30:10,679 - __main__ - INFO -
  4162. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4163. ----------------------------------------------------------------------------------
  4164. 2025-05-17 23:30:10,679 - __main__ - INFO -
  4165. Worker ID | started
  4166. ----------+--------
  4167. 0 | 5
  4168. 2025-05-17 23:30:20,680 - __main__ - INFO - Queue remaining: 0
  4169. 2025-05-17 23:30:20,681 - __main__ - INFO -
  4170. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4171. ----------------------------------------------------------------------------------
  4172. 2025-05-17 23:30:20,681 - __main__ - INFO -
  4173. Worker ID | started
  4174. ----------+--------
  4175. 0 | 5
  4176. 2025-05-17 23:30:27,429 - sglang - INFO - [2025-05-17 23:30:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4177. 2025-05-17 23:30:27,430 - __main__ - INFO - sglang running req: 0 queue req: 0
  4178. 2025-05-17 23:30:28,255 - sglang - INFO - [2025-05-17 23:30:28 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  4179. 2025-05-17 23:30:28,256 - __main__ - INFO - sglang running req: 1 queue req: 0
  4180. 2025-05-17 23:30:30,682 - __main__ - INFO - Queue remaining: 0
  4181. 2025-05-17 23:30:30,682 - __main__ - INFO -
  4182. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4183. ----------------------------------------------------------------------------------
  4184. 2025-05-17 23:30:30,682 - __main__ - INFO -
  4185. Worker ID | started
  4186. ----------+--------
  4187. 0 | 5
  4188. 2025-05-17 23:30:31,388 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4189. 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 1 exiting due to empty queue
  4190. 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 2 exiting due to empty queue
  4191. 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 3 exiting due to empty queue
  4192. 2025-05-17 23:30:31,388 - __main__ - INFO - Worker 4 exiting due to empty queue
  4193. 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 5 exiting due to empty queue
  4194. 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 6 exiting due to empty queue
  4195. 2025-05-17 23:30:31,389 - __main__ - INFO - Worker 7 exiting due to empty queue
  4196. 2025-05-17 23:30:31,705 - sglang - INFO - [2025-05-17 23:30:31 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.36, #queue-req: 0
  4197. 2025-05-17 23:30:31,705 - __main__ - INFO - sglang running req: 5 queue req: 0
  4198. 2025-05-17 23:30:32,564 - sglang - INFO - [2025-05-17 23:30:32 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.73, #queue-req: 0
  4199. 2025-05-17 23:30:32,565 - __main__ - INFO - sglang running req: 5 queue req: 0
  4200. 2025-05-17 23:30:33,423 - sglang - INFO - [2025-05-17 23:30:33 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.90, #queue-req: 0
  4201. 2025-05-17 23:30:33,423 - __main__ - INFO - sglang running req: 5 queue req: 0
  4202. 2025-05-17 23:30:34,282 - sglang - INFO - [2025-05-17 23:30:34 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.94, #queue-req: 0
  4203. 2025-05-17 23:30:34,282 - __main__ - INFO - sglang running req: 5 queue req: 0
  4204. 2025-05-17 23:30:35,140 - sglang - INFO - [2025-05-17 23:30:35 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.96, #queue-req: 0
  4205. 2025-05-17 23:30:35,140 - __main__ - INFO - sglang running req: 5 queue req: 0
  4206. 2025-05-17 23:30:36,001 - sglang - INFO - [2025-05-17 23:30:36 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.38, #queue-req: 0
  4207. 2025-05-17 23:30:36,001 - __main__ - INFO - sglang running req: 5 queue req: 0
  4208. 2025-05-17 23:30:36,863 - sglang - INFO - [2025-05-17 23:30:36 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.91, #queue-req: 0
  4209. 2025-05-17 23:30:36,863 - __main__ - INFO - sglang running req: 5 queue req: 0
  4210. 2025-05-17 23:30:37,725 - sglang - INFO - [2025-05-17 23:30:37 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.03, #queue-req: 0
  4211. 2025-05-17 23:30:37,725 - __main__ - INFO - sglang running req: 5 queue req: 0
  4212. 2025-05-17 23:30:38,589 - sglang - INFO - [2025-05-17 23:30:38 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.60, #queue-req: 0
  4213. 2025-05-17 23:30:38,589 - __main__ - INFO - sglang running req: 4 queue req: 0
  4214. 2025-05-17 23:30:39,434 - sglang - INFO - [2025-05-17 23:30:39 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.61, #queue-req: 0
  4215. 2025-05-17 23:30:39,434 - __main__ - INFO - sglang running req: 3 queue req: 0
  4216. 2025-05-17 23:30:40,271 - sglang - INFO - [2025-05-17 23:30:40 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 83.61, #queue-req: 0
  4217. 2025-05-17 23:30:40,271 - __main__ - INFO - sglang running req: 1 queue req: 0
  4218. 2025-05-17 23:30:40,684 - __main__ - INFO - Queue remaining: 0
  4219. 2025-05-17 23:30:40,685 - __main__ - INFO -
  4220. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4221. ----------------------------------------------------------------------------------
  4222. sglang_input_tokens 92.05 92.05
  4223. sglang_output_tokens 17.96 17.96
  4224. 2025-05-17 23:30:40,685 - __main__ - INFO -
  4225. Worker ID | finished | started
  4226. ----------+----------+--------
  4227. 0 | 4 | 5
  4228. 2025-05-17 23:30:41,099 - sglang - INFO - [2025-05-17 23:30:41 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.34, #queue-req: 0
  4229. 2025-05-17 23:30:41,099 - __main__ - INFO - sglang running req: 1 queue req: 0
  4230. 2025-05-17 23:30:41,766 - __main__ - INFO - Finished TaskGroup for worker on d9af858998245f0877efab6a7aad7fa5652f8d23
  4231. 2025-05-17 23:30:41,766 - __main__ - INFO - Got 1 docs for d9af858998245f0877efab6a7aad7fa5652f8d23
  4232. 2025-05-17 23:30:41,768 - __main__ - INFO - Worker 0 exiting due to empty queue
  4233. 2025-05-17 23:30:41,768 - __main__ - INFO - Work done
  4234. 2025-05-17 23:30:41,768 - __main__ - INFO - Got cancellation request for SGLang server
  4235. 2025-05-17 23:37:24,245 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4236. 2025-05-17 23:37:24,245 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496237/input.pdf as PDF document
  4237. 2025-05-17 23:37:24,245 - __main__ - INFO - Found 1 total pdf paths to add
  4238. 2025-05-17 23:37:24,250 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  4239. 2025-05-17 23:37:24,505 - __main__ - INFO - Starting pipeline with PID 455265
  4240. 2025-05-17 23:37:24,505 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4241. 2025-05-17 23:37:30,487 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4242. 2025-05-17 23:37:31,529 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4243. 2025-05-17 23:37:32,586 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4244. 2025-05-17 23:37:33,631 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4245. 2025-05-17 23:37:34,677 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4246. 2025-05-17 23:37:35,709 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4247. 2025-05-17 23:37:36,253 - sglang - INFO - [2025-05-17 23:37:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=359813597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4248. 2025-05-17 23:37:36,253 - __main__ - INFO - [2025-05-17 23:37:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=359813597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4249. 2025-05-17 23:37:36,771 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4250. 2025-05-17 23:37:37,837 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4251. 2025-05-17 23:37:38,908 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4252. 2025-05-17 23:37:39,979 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4253. 2025-05-17 23:37:41,048 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4254. 2025-05-17 23:37:42,114 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4255. 2025-05-17 23:37:43,181 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4256. 2025-05-17 23:37:44,250 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4257. 2025-05-17 23:37:44,933 - sglang - INFO - [2025-05-17 23:37:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4258. 2025-05-17 23:37:44,933 - __main__ - INFO - [2025-05-17 23:37:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4259. 2025-05-17 23:37:45,329 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4260. 2025-05-17 23:37:46,400 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4261. 2025-05-17 23:37:47,465 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4262. 2025-05-17 23:37:48,532 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4263. 2025-05-17 23:37:49,587 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4264. 2025-05-17 23:37:50,469 - sglang - INFO - [2025-05-17 23:37:50 TP0] Overlap scheduler is disabled for multimodal models.
  4265. 2025-05-17 23:37:50,470 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Overlap scheduler is disabled for multimodal models.
  4266. 2025-05-17 23:37:50,665 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4267. 2025-05-17 23:37:50,967 - sglang - INFO - [2025-05-17 23:37:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4268. 2025-05-17 23:37:50,967 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4269. 2025-05-17 23:37:50,968 - sglang - INFO - [2025-05-17 23:37:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4270. 2025-05-17 23:37:50,968 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4271. 2025-05-17 23:37:50,968 - sglang - INFO - [2025-05-17 23:37:50 TP0] Init torch distributed begin.
  4272. 2025-05-17 23:37:50,968 - __main__ - INFO - [2025-05-17 23:37:50 TP0] Init torch distributed begin.
  4273. 2025-05-17 23:37:51,744 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4274. 2025-05-17 23:37:52,814 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4275. 2025-05-17 23:37:53,884 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4276. 2025-05-17 23:37:54,954 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4277. 2025-05-17 23:37:56,021 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4278. 2025-05-17 23:37:56,348 - sglang - INFO - [2025-05-17 23:37:56 TP0] Load weight begin. avail mem=23.33 GB
  4279. 2025-05-17 23:37:56,348 - __main__ - INFO - [2025-05-17 23:37:56 TP0] Load weight begin. avail mem=23.33 GB
  4280. 2025-05-17 23:37:57,101 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4281. 2025-05-17 23:37:57,429 - sglang - INFO - [2025-05-17 23:37:57 TP0] Using model weights format ['*.safetensors']
  4282. 2025-05-17 23:37:57,430 - __main__ - INFO - [2025-05-17 23:37:57 TP0] Using model weights format ['*.safetensors']
  4283. 2025-05-17 23:37:58,182 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4284. 2025-05-17 23:37:58,390 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4285. 2025-05-17 23:37:58,390 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4286. 2025-05-17 23:37:58,681 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.44it/s]
  4287. 2025-05-17 23:37:58,681 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.44it/s]
  4288. 2025-05-17 23:37:59,261 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4289. 2025-05-17 23:37:59,632 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.47it/s]
  4290. 2025-05-17 23:37:59,632 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.47it/s]
  4291. 2025-05-17 23:38:00,342 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4292. 2025-05-17 23:38:00,586 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
  4293. 2025-05-17 23:38:00,586 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
  4294. 2025-05-17 23:38:01,422 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4295. 2025-05-17 23:38:01,515 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
  4296. 2025-05-17 23:38:01,515 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
  4297. 2025-05-17 23:38:01,515 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
  4298. 2025-05-17 23:38:01,515 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
  4299. 2025-05-17 23:38:01,515 - sglang - INFO -
  4300. 2025-05-17 23:38:01,515 - __main__ - INFO -
  4301. 2025-05-17 23:38:01,662 - sglang - INFO - [2025-05-17 23:38:01 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4302. 2025-05-17 23:38:01,663 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4303. 2025-05-17 23:38:01,669 - sglang - INFO - [2025-05-17 23:38:01 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4304. 2025-05-17 23:38:01,669 - __main__ - INFO - [2025-05-17 23:38:01 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4305. 2025-05-17 23:38:01,669 - sglang - INFO - [2025-05-17 23:38:01 TP0] Memory pool end. avail mem=5.30 GB
  4306. 2025-05-17 23:38:01,669 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Memory pool end. avail mem=5.30 GB
  4307. 2025-05-17 23:38:01,847 - sglang - INFO - [2025-05-17 23:38:01 TP0] Capture cuda graph begin. This can take up to several minutes.
  4308. 2025-05-17 23:38:01,847 - __main__ - INFO - [2025-05-17 23:38:01 TP0] Capture cuda graph begin. This can take up to several minutes.
  4309. 2025-05-17 23:38:02,502 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  4310. 2025-05-17 23:38:03,580 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s] 100%|██████████| 4/4 [00:01<00:00, 2.90it/s] 100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
  4311. 2025-05-17 23:38:03,581 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.47it/s] 100%|██████████| 4/4 [00:01<00:00, 2.90it/s] 100%|██████████| 4/4 [00:01<00:00, 2.36it/s]
  4312. 2025-05-17 23:38:03,581 - sglang - INFO - [2025-05-17 23:38:03 TP0] Capture cuda graph end. Time elapsed: 1.70 s
  4313. 2025-05-17 23:38:03,581 - __main__ - INFO - [2025-05-17 23:38:03 TP0] Capture cuda graph end. Time elapsed: 1.70 s
  4314. 2025-05-17 23:38:03,582 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  4315. 2025-05-17 23:38:04,636 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  4316. 2025-05-17 23:38:05,702 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  4317. 2025-05-17 23:38:05,961 - sglang - INFO - [2025-05-17 23:38:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4318. 2025-05-17 23:38:05,961 - __main__ - INFO - [2025-05-17 23:38:05 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4319. 2025-05-17 23:38:06,790 - __main__ - INFO - sglang server is ready.
  4320. 2025-05-17 23:38:06,790 - __main__ - INFO - Queue remaining: 1
  4321. 2025-05-17 23:38:06,790 - __main__ - INFO -
  4322. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4323. ----------------------------------------------------------------------------------
  4324. 2025-05-17 23:38:06,790 - __main__ - INFO -
  4325. Worker ID
  4326. ---------
  4327. 2025-05-17 23:38:06,791 - __main__ - INFO - Worker 0 processing work item e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
  4328. 2025-05-17 23:38:06,791 - __main__ - INFO - Created all tasks for e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
  4329. 2025-05-17 23:38:06,795 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496237/input.pdf in worker 0
  4330. 2025-05-17 23:38:07,030 - sglang - INFO - [2025-05-17 23:38:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4331. 2025-05-17 23:38:07,030 - __main__ - INFO - [2025-05-17 23:38:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4332. 2025-05-17 23:38:07,030 - __main__ - INFO - sglang running req: 0 queue req: 0
  4333. 2025-05-17 23:38:07,781 - sglang - INFO - [2025-05-17 23:38:07] The server is fired up and ready to roll!
  4334. 2025-05-17 23:38:07,781 - __main__ - INFO - [2025-05-17 23:38:07] The server is fired up and ready to roll!
  4335. 2025-05-17 23:38:13,176 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-1
  4336. 2025-05-17 23:38:13,212 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-2
  4337. 2025-05-17 23:38:13,217 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-3
  4338. 2025-05-17 23:38:13,244 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-4
  4339. 2025-05-17 23:38:13,248 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496237/input.pdf-5
  4340. 2025-05-17 23:38:16,879 - __main__ - INFO - Queue remaining: 0
  4341. 2025-05-17 23:38:16,879 - __main__ - INFO -
  4342. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4343. ----------------------------------------------------------------------------------
  4344. 2025-05-17 23:38:16,879 - __main__ - INFO -
  4345. Worker ID | started
  4346. ----------+--------
  4347. 0 | 5
  4348. 2025-05-17 23:38:26,880 - __main__ - INFO - Queue remaining: 0
  4349. 2025-05-17 23:38:26,880 - __main__ - INFO -
  4350. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4351. ----------------------------------------------------------------------------------
  4352. 2025-05-17 23:38:26,880 - __main__ - INFO -
  4353. Worker ID | started
  4354. ----------+--------
  4355. 0 | 5
  4356. 2025-05-17 23:38:32,264 - sglang - INFO - [2025-05-17 23:38:32 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4357. 2025-05-17 23:38:32,264 - __main__ - INFO - sglang running req: 0 queue req: 0
  4358. 2025-05-17 23:38:33,067 - sglang - INFO - [2025-05-17 23:38:33 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  4359. 2025-05-17 23:38:33,067 - __main__ - INFO - sglang running req: 1 queue req: 0
  4360. 2025-05-17 23:38:36,474 - sglang - INFO - [2025-05-17 23:38:36 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.64, #queue-req: 0
  4361. 2025-05-17 23:38:36,474 - __main__ - INFO - sglang running req: 5 queue req: 0
  4362. 2025-05-17 23:38:36,881 - __main__ - INFO - Queue remaining: 0
  4363. 2025-05-17 23:38:36,881 - __main__ - INFO -
  4364. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4365. ----------------------------------------------------------------------------------
  4366. 2025-05-17 23:38:36,882 - __main__ - INFO -
  4367. Worker ID | started
  4368. ----------+--------
  4369. 0 | 5
  4370. 2025-05-17 23:38:37,329 - sglang - INFO - [2025-05-17 23:38:37 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.78, #queue-req: 0
  4371. 2025-05-17 23:38:37,329 - __main__ - INFO - sglang running req: 5 queue req: 0
  4372. 2025-05-17 23:38:37,790 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4373. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 1 exiting due to empty queue
  4374. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 2 exiting due to empty queue
  4375. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 3 exiting due to empty queue
  4376. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 4 exiting due to empty queue
  4377. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 5 exiting due to empty queue
  4378. 2025-05-17 23:38:37,791 - __main__ - INFO - Worker 6 exiting due to empty queue
  4379. 2025-05-17 23:38:37,792 - __main__ - INFO - Worker 7 exiting due to empty queue
  4380. 2025-05-17 23:38:38,184 - sglang - INFO - [2025-05-17 23:38:38 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.89, #queue-req: 0
  4381. 2025-05-17 23:38:38,185 - __main__ - INFO - sglang running req: 5 queue req: 0
  4382. 2025-05-17 23:38:39,039 - sglang - INFO - [2025-05-17 23:38:39 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.96, #queue-req: 0
  4383. 2025-05-17 23:38:39,039 - __main__ - INFO - sglang running req: 5 queue req: 0
  4384. 2025-05-17 23:38:39,894 - sglang - INFO - [2025-05-17 23:38:39 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.90, #queue-req: 0
  4385. 2025-05-17 23:38:39,894 - __main__ - INFO - sglang running req: 5 queue req: 0
  4386. 2025-05-17 23:38:40,751 - sglang - INFO - [2025-05-17 23:38:40 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.33, #queue-req: 0
  4387. 2025-05-17 23:38:40,752 - __main__ - INFO - sglang running req: 5 queue req: 0
  4388. 2025-05-17 23:38:41,611 - sglang - INFO - [2025-05-17 23:38:41 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.67, #queue-req: 0
  4389. 2025-05-17 23:38:41,611 - __main__ - INFO - sglang running req: 5 queue req: 0
  4390. 2025-05-17 23:38:42,472 - sglang - INFO - [2025-05-17 23:38:42 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.26, #queue-req: 0
  4391. 2025-05-17 23:38:42,472 - __main__ - INFO - sglang running req: 5 queue req: 0
  4392. 2025-05-17 23:38:43,330 - sglang - INFO - [2025-05-17 23:38:43 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.73, #queue-req: 0
  4393. 2025-05-17 23:38:43,330 - __main__ - INFO - sglang running req: 3 queue req: 0
  4394. 2025-05-17 23:38:44,170 - sglang - INFO - [2025-05-17 23:38:44 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.85, #queue-req: 0
  4395. 2025-05-17 23:38:44,171 - __main__ - INFO - sglang running req: 3 queue req: 0
  4396. 2025-05-17 23:38:45,002 - sglang - INFO - [2025-05-17 23:38:45 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.14, #queue-req: 0
  4397. 2025-05-17 23:38:45,002 - __main__ - INFO - sglang running req: 1 queue req: 0
  4398. 2025-05-17 23:38:45,826 - sglang - INFO - [2025-05-17 23:38:45 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.57, #queue-req: 0
  4399. 2025-05-17 23:38:45,826 - __main__ - INFO - sglang running req: 1 queue req: 0
  4400. 2025-05-17 23:38:46,490 - __main__ - INFO - Finished TaskGroup for worker on e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
  4401. 2025-05-17 23:38:46,491 - __main__ - INFO - Got 1 docs for e48bab7ddc862bf0fbce5dbd44894d26f2a0404e
  4402. 2025-05-17 23:38:46,492 - __main__ - INFO - Worker 0 exiting due to empty queue
  4403. 2025-05-17 23:38:46,492 - __main__ - INFO - Work done
  4404. 2025-05-17 23:38:46,492 - __main__ - INFO - Got cancellation request for SGLang server
  4405. 2025-05-17 23:39:16,278 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4406. 2025-05-17 23:39:16,278 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496349/input.pdf as PDF document
  4407. 2025-05-17 23:39:16,278 - __main__ - INFO - Found 1 total pdf paths to add
  4408. 2025-05-17 23:39:16,280 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  4409. 2025-05-17 23:39:16,478 - __main__ - INFO - Starting pipeline with PID 456898
  4410. 2025-05-17 23:39:16,478 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4411. 2025-05-17 23:39:22,084 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4412. 2025-05-17 23:39:23,125 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4413. 2025-05-17 23:39:24,182 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4414. 2025-05-17 23:39:25,216 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4415. 2025-05-17 23:39:26,267 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4416. 2025-05-17 23:39:27,345 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4417. 2025-05-17 23:39:28,420 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4418. 2025-05-17 23:39:28,442 - sglang - INFO - [2025-05-17 23:39:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=415081957, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4419. 2025-05-17 23:39:28,442 - __main__ - INFO - [2025-05-17 23:39:28] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=415081957, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4420. 2025-05-17 23:39:29,494 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4421. 2025-05-17 23:39:30,562 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4422. 2025-05-17 23:39:31,638 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4423. 2025-05-17 23:39:32,705 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4424. 2025-05-17 23:39:33,781 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4425. 2025-05-17 23:39:34,838 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4426. 2025-05-17 23:39:35,905 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4427. 2025-05-17 23:39:36,972 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4428. 2025-05-17 23:39:36,984 - sglang - INFO - [2025-05-17 23:39:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4429. 2025-05-17 23:39:36,984 - __main__ - INFO - [2025-05-17 23:39:36] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4430. 2025-05-17 23:39:38,048 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4431. 2025-05-17 23:39:39,114 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4432. 2025-05-17 23:39:40,170 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4433. 2025-05-17 23:39:41,236 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4434. 2025-05-17 23:39:42,303 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4435. 2025-05-17 23:39:43,369 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4436. 2025-05-17 23:39:43,699 - sglang - INFO - [2025-05-17 23:39:43 TP0] Overlap scheduler is disabled for multimodal models.
  4437. 2025-05-17 23:39:43,700 - __main__ - INFO - [2025-05-17 23:39:43 TP0] Overlap scheduler is disabled for multimodal models.
  4438. 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4439. 2025-05-17 23:39:44,265 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4440. 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4441. 2025-05-17 23:39:44,265 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4442. 2025-05-17 23:39:44,265 - sglang - INFO - [2025-05-17 23:39:44 TP0] Init torch distributed begin.
  4443. 2025-05-17 23:39:44,266 - __main__ - INFO - [2025-05-17 23:39:44 TP0] Init torch distributed begin.
  4444. 2025-05-17 23:39:44,450 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4445. 2025-05-17 23:39:45,523 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4446. 2025-05-17 23:39:46,590 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4447. 2025-05-17 23:39:47,657 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4448. 2025-05-17 23:39:48,728 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4449. 2025-05-17 23:39:49,593 - sglang - INFO - [2025-05-17 23:39:49 TP0] Load weight begin. avail mem=23.33 GB
  4450. 2025-05-17 23:39:49,593 - __main__ - INFO - [2025-05-17 23:39:49 TP0] Load weight begin. avail mem=23.33 GB
  4451. 2025-05-17 23:39:49,812 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4452. 2025-05-17 23:39:50,680 - sglang - INFO - [2025-05-17 23:39:50 TP0] Using model weights format ['*.safetensors']
  4453. 2025-05-17 23:39:50,681 - __main__ - INFO - [2025-05-17 23:39:50 TP0] Using model weights format ['*.safetensors']
  4454. 2025-05-17 23:39:50,891 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4455. 2025-05-17 23:39:51,250 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4456. 2025-05-17 23:39:51,250 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4457. 2025-05-17 23:39:51,542 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  4458. 2025-05-17 23:39:51,542 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  4459. 2025-05-17 23:39:51,973 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4460. 2025-05-17 23:39:52,500 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
  4461. 2025-05-17 23:39:52,500 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.46it/s]
  4462. 2025-05-17 23:39:53,054 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4463. 2025-05-17 23:39:53,459 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
  4464. 2025-05-17 23:39:53,459 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.24it/s]
  4465. 2025-05-17 23:39:54,134 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  4466. 2025-05-17 23:39:54,407 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
  4467. 2025-05-17 23:39:54,408 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.16it/s]
  4468. 2025-05-17 23:39:54,408 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  4469. 2025-05-17 23:39:54,408 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  4470. 2025-05-17 23:39:54,408 - sglang - INFO -
  4471. 2025-05-17 23:39:54,408 - __main__ - INFO -
  4472. 2025-05-17 23:39:54,555 - sglang - INFO - [2025-05-17 23:39:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4473. 2025-05-17 23:39:54,555 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4474. 2025-05-17 23:39:54,561 - sglang - INFO - [2025-05-17 23:39:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4475. 2025-05-17 23:39:54,561 - __main__ - INFO - [2025-05-17 23:39:54 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4476. 2025-05-17 23:39:54,561 - sglang - INFO - [2025-05-17 23:39:54 TP0] Memory pool end. avail mem=5.30 GB
  4477. 2025-05-17 23:39:54,561 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Memory pool end. avail mem=5.30 GB
  4478. 2025-05-17 23:39:54,714 - sglang - INFO - [2025-05-17 23:39:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  4479. 2025-05-17 23:39:54,714 - __main__ - INFO - [2025-05-17 23:39:54 TP0] Capture cuda graph begin. This can take up to several minutes.
  4480. 2025-05-17 23:39:55,213 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  4481. 2025-05-17 23:39:56,293 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  4482. 2025-05-17 23:39:56,404 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.93it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  4483. 2025-05-17 23:39:56,404 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.87it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.48it/s] 100%|██████████| 4/4 [00:01<00:00, 2.93it/s] 100%|██████████| 4/4 [00:01<00:00, 2.37it/s]
  4484. 2025-05-17 23:39:56,404 - sglang - INFO - [2025-05-17 23:39:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  4485. 2025-05-17 23:39:56,404 - __main__ - INFO - [2025-05-17 23:39:56 TP0] Capture cuda graph end. Time elapsed: 1.69 s
  4486. 2025-05-17 23:39:57,372 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  4487. 2025-05-17 23:39:58,442 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  4488. 2025-05-17 23:39:59,508 - sglang - INFO - [2025-05-17 23:39:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4489. 2025-05-17 23:39:59,508 - __main__ - INFO - [2025-05-17 23:39:59 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4490. 2025-05-17 23:39:59,509 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  4491. 2025-05-17 23:40:00,581 - sglang - INFO - [2025-05-17 23:40:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4492. 2025-05-17 23:40:00,581 - __main__ - INFO - [2025-05-17 23:40:00 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4493. 2025-05-17 23:40:00,581 - __main__ - INFO - sglang running req: 0 queue req: 0
  4494. 2025-05-17 23:40:00,616 - __main__ - INFO - sglang server is ready.
  4495. 2025-05-17 23:40:00,616 - __main__ - INFO - Queue remaining: 1
  4496. 2025-05-17 23:40:00,617 - __main__ - INFO -
  4497. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4498. ----------------------------------------------------------------------------------
  4499. 2025-05-17 23:40:00,617 - __main__ - INFO -
  4500. Worker ID
  4501. ---------
  4502. 2025-05-17 23:40:00,617 - __main__ - INFO - Worker 0 processing work item cfa98586a6393f5105b98d306c800389907d0452
  4503. 2025-05-17 23:40:00,617 - __main__ - INFO - Created all tasks for cfa98586a6393f5105b98d306c800389907d0452
  4504. 2025-05-17 23:40:00,620 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747496349/input.pdf in worker 0
  4505. 2025-05-17 23:40:01,096 - sglang - INFO - [2025-05-17 23:40:01] The server is fired up and ready to roll!
  4506. 2025-05-17 23:40:01,096 - __main__ - INFO - [2025-05-17 23:40:01] The server is fired up and ready to roll!
  4507. 2025-05-17 23:40:06,960 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496349/input.pdf-1
  4508. 2025-05-17 23:40:10,679 - __main__ - INFO - Queue remaining: 0
  4509. 2025-05-17 23:40:10,679 - __main__ - INFO -
  4510. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4511. ----------------------------------------------------------------------------------
  4512. 2025-05-17 23:40:10,680 - __main__ - INFO -
  4513. Worker ID | started
  4514. ----------+--------
  4515. 0 | 1
  4516. 2025-05-17 23:40:20,681 - __main__ - INFO - Queue remaining: 0
  4517. 2025-05-17 23:40:20,681 - __main__ - INFO -
  4518. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4519. ----------------------------------------------------------------------------------
  4520. 2025-05-17 23:40:20,682 - __main__ - INFO -
  4521. Worker ID | started
  4522. ----------+--------
  4523. 0 | 1
  4524. 2025-05-17 23:40:27,865 - sglang - INFO - [2025-05-17 23:40:27 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4525. 2025-05-17 23:40:27,865 - __main__ - INFO - sglang running req: 0 queue req: 0
  4526. 2025-05-17 23:40:29,308 - sglang - INFO - [2025-05-17 23:40:29 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.34, #queue-req: 0
  4527. 2025-05-17 23:40:29,309 - __main__ - INFO - sglang running req: 1 queue req: 0
  4528. 2025-05-17 23:40:30,130 - sglang - INFO - [2025-05-17 23:40:30 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.67, #queue-req: 0
  4529. 2025-05-17 23:40:30,130 - __main__ - INFO - sglang running req: 1 queue req: 0
  4530. 2025-05-17 23:40:30,683 - __main__ - INFO - Queue remaining: 0
  4531. 2025-05-17 23:40:30,683 - __main__ - INFO -
  4532. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4533. ----------------------------------------------------------------------------------
  4534. 2025-05-17 23:40:30,683 - __main__ - INFO -
  4535. Worker ID | started
  4536. ----------+--------
  4537. 0 | 1
  4538. 2025-05-17 23:40:30,950 - sglang - INFO - [2025-05-17 23:40:30 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.79, #queue-req: 0
  4539. 2025-05-17 23:40:30,950 - __main__ - INFO - sglang running req: 1 queue req: 0
  4540. 2025-05-17 23:40:31,390 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4541. 2025-05-17 23:40:31,390 - __main__ - INFO - Worker 1 exiting due to empty queue
  4542. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 2 exiting due to empty queue
  4543. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 3 exiting due to empty queue
  4544. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 4 exiting due to empty queue
  4545. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 5 exiting due to empty queue
  4546. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 6 exiting due to empty queue
  4547. 2025-05-17 23:40:31,391 - __main__ - INFO - Worker 7 exiting due to empty queue
  4548. 2025-05-17 23:40:31,769 - sglang - INFO - [2025-05-17 23:40:31 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.81, #queue-req: 0
  4549. 2025-05-17 23:40:31,770 - __main__ - INFO - sglang running req: 1 queue req: 0
  4550. 2025-05-17 23:40:32,590 - sglang - INFO - [2025-05-17 23:40:32 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.73, #queue-req: 0
  4551. 2025-05-17 23:40:32,590 - __main__ - INFO - sglang running req: 1 queue req: 0
  4552. 2025-05-17 23:40:33,412 - sglang - INFO - [2025-05-17 23:40:33 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.69, #queue-req: 0
  4553. 2025-05-17 23:40:33,412 - __main__ - INFO - sglang running req: 1 queue req: 0
  4554. 2025-05-17 23:40:34,233 - sglang - INFO - [2025-05-17 23:40:34 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.71, #queue-req: 0
  4555. 2025-05-17 23:40:34,233 - __main__ - INFO - sglang running req: 1 queue req: 0
  4556. 2025-05-17 23:40:34,281 - __main__ - INFO - Finished TaskGroup for worker on cfa98586a6393f5105b98d306c800389907d0452
  4557. 2025-05-17 23:40:34,282 - __main__ - INFO - Got 1 docs for cfa98586a6393f5105b98d306c800389907d0452
  4558. 2025-05-17 23:40:34,283 - __main__ - INFO - Worker 0 exiting due to empty queue
  4559. 2025-05-17 23:40:34,283 - __main__ - INFO - Work done
  4560. 2025-05-17 23:40:34,284 - __main__ - INFO - Got cancellation request for SGLang server
  4561. 2025-05-17 23:41:02,747 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4562. 2025-05-17 23:41:02,747 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496456/input.pdf as PDF document
  4563. 2025-05-17 23:41:02,747 - __main__ - INFO - Found 1 total pdf paths to add
  4564. 2025-05-17 23:41:02,751 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  4565. 2025-05-17 23:41:02,980 - __main__ - INFO - Starting pipeline with PID 458294
  4566. 2025-05-17 23:41:02,980 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4567. 2025-05-17 23:41:08,598 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4568. 2025-05-17 23:41:09,637 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4569. 2025-05-17 23:41:10,695 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4570. 2025-05-17 23:41:11,760 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4571. 2025-05-17 23:41:12,829 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4572. 2025-05-17 23:41:13,899 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4573. 2025-05-17 23:41:14,385 - sglang - INFO - [2025-05-17 23:41:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=289781152, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4574. 2025-05-17 23:41:14,385 - __main__ - INFO - [2025-05-17 23:41:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=289781152, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4575. 2025-05-17 23:41:14,978 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4576. 2025-05-17 23:41:16,045 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4577. 2025-05-17 23:41:17,111 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4578. 2025-05-17 23:41:17,736 - sglang - INFO - [2025-05-17 23:41:17] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4579. 2025-05-17 23:41:17,736 - __main__ - INFO - [2025-05-17 23:41:17] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4580. 2025-05-17 23:41:18,189 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4581. 2025-05-17 23:41:19,225 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4582. 2025-05-17 23:41:20,287 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4583. 2025-05-17 23:41:21,353 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4584. 2025-05-17 23:41:22,422 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4585. 2025-05-17 23:41:23,457 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4586. 2025-05-17 23:41:24,519 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4587. 2025-05-17 23:41:25,585 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4588. 2025-05-17 23:41:26,651 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4589. 2025-05-17 23:41:27,705 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4590. 2025-05-17 23:41:28,769 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4591. 2025-05-17 23:41:29,351 - sglang - INFO - [2025-05-17 23:41:29 TP0] Overlap scheduler is disabled for multimodal models.
  4592. 2025-05-17 23:41:29,351 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Overlap scheduler is disabled for multimodal models.
  4593. 2025-05-17 23:41:29,848 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4594. 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4595. 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4596. 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4597. 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4598. 2025-05-17 23:41:29,893 - sglang - INFO - [2025-05-17 23:41:29 TP0] Init torch distributed begin.
  4599. 2025-05-17 23:41:29,893 - __main__ - INFO - [2025-05-17 23:41:29 TP0] Init torch distributed begin.
  4600. 2025-05-17 23:41:30,883 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4601. 2025-05-17 23:41:31,945 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4602. 2025-05-17 23:41:33,012 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4603. 2025-05-17 23:41:34,073 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4604. 2025-05-17 23:41:35,140 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4605. 2025-05-17 23:41:35,230 - sglang - INFO - [2025-05-17 23:41:35 TP0] Load weight begin. avail mem=23.33 GB
  4606. 2025-05-17 23:41:35,230 - __main__ - INFO - [2025-05-17 23:41:35 TP0] Load weight begin. avail mem=23.33 GB
  4607. 2025-05-17 23:41:36,218 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4608. 2025-05-17 23:41:36,278 - sglang - INFO - [2025-05-17 23:41:36 TP0] Using model weights format ['*.safetensors']
  4609. 2025-05-17 23:41:36,278 - __main__ - INFO - [2025-05-17 23:41:36 TP0] Using model weights format ['*.safetensors']
  4610. 2025-05-17 23:41:37,047 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4611. 2025-05-17 23:41:37,047 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4612. 2025-05-17 23:41:37,296 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4613. 2025-05-17 23:41:37,325 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.59it/s]
  4614. 2025-05-17 23:41:37,326 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.59it/s]
  4615. 2025-05-17 23:41:38,241 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.53it/s]
  4616. 2025-05-17 23:41:38,241 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.53it/s]
  4617. 2025-05-17 23:41:38,332 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4618. 2025-05-17 23:41:39,165 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.29it/s]
  4619. 2025-05-17 23:41:39,166 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.29it/s]
  4620. 2025-05-17 23:41:39,367 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4621. 2025-05-17 23:41:40,032 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
  4622. 2025-05-17 23:41:40,033 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.23it/s]
  4623. 2025-05-17 23:41:40,033 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
  4624. 2025-05-17 23:41:40,033 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
  4625. 2025-05-17 23:41:40,033 - sglang - INFO -
  4626. 2025-05-17 23:41:40,033 - __main__ - INFO -
  4627. 2025-05-17 23:41:40,162 - sglang - INFO - [2025-05-17 23:41:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4628. 2025-05-17 23:41:40,163 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4629. 2025-05-17 23:41:40,168 - sglang - INFO - [2025-05-17 23:41:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4630. 2025-05-17 23:41:40,169 - __main__ - INFO - [2025-05-17 23:41:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4631. 2025-05-17 23:41:40,169 - sglang - INFO - [2025-05-17 23:41:40 TP0] Memory pool end. avail mem=5.30 GB
  4632. 2025-05-17 23:41:40,169 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Memory pool end. avail mem=5.30 GB
  4633. 2025-05-17 23:41:40,320 - sglang - INFO - [2025-05-17 23:41:40 TP0] Capture cuda graph begin. This can take up to several minutes.
  4634. 2025-05-17 23:41:40,320 - __main__ - INFO - [2025-05-17 23:41:40 TP0] Capture cuda graph begin. This can take up to several minutes.
  4635. 2025-05-17 23:41:40,445 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  4636. 2025-05-17 23:41:41,517 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  4637. 2025-05-17 23:41:41,948 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.12it/s] 50%|█████ | 2/4 [00:01<00:01, 1.96it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s] 100%|██████████| 4/4 [00:01<00:00, 3.01it/s] 100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
  4638. 2025-05-17 23:41:41,948 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.12it/s] 50%|█████ | 2/4 [00:01<00:01, 1.96it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.57it/s] 100%|██████████| 4/4 [00:01<00:00, 3.01it/s] 100%|██████████| 4/4 [00:01<00:00, 2.46it/s]
  4639. 2025-05-17 23:41:41,949 - sglang - INFO - [2025-05-17 23:41:41 TP0] Capture cuda graph end. Time elapsed: 1.63 s
  4640. 2025-05-17 23:41:41,949 - __main__ - INFO - [2025-05-17 23:41:41 TP0] Capture cuda graph end. Time elapsed: 1.63 s
  4641. 2025-05-17 23:41:42,597 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  4642. 2025-05-17 23:41:43,664 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  4643. 2025-05-17 23:41:44,630 - sglang - INFO - [2025-05-17 23:41:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4644. 2025-05-17 23:41:44,630 - __main__ - INFO - [2025-05-17 23:41:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4645. 2025-05-17 23:41:44,744 - __main__ - INFO - sglang server is ready.
  4646. 2025-05-17 23:41:44,744 - __main__ - INFO - Queue remaining: 1
  4647. 2025-05-17 23:41:44,744 - __main__ - INFO -
  4648. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4649. ----------------------------------------------------------------------------------
  4650. 2025-05-17 23:41:44,745 - __main__ - INFO -
  4651. Worker ID
  4652. ---------
  4653. 2025-05-17 23:41:44,745 - __main__ - INFO - Worker 0 processing work item 199e15fb97f71d3ed170b35970694c1935783252
  4654. 2025-05-17 23:41:44,745 - __main__ - INFO - Created all tasks for 199e15fb97f71d3ed170b35970694c1935783252
  4655. 2025-05-17 23:41:44,750 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496456/input.pdf in worker 0
  4656. 2025-05-17 23:41:45,787 - sglang - INFO - [2025-05-17 23:41:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4657. 2025-05-17 23:41:45,787 - __main__ - INFO - [2025-05-17 23:41:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4658. 2025-05-17 23:41:45,787 - __main__ - INFO - sglang running req: 0 queue req: 0
  4659. 2025-05-17 23:41:46,565 - sglang - INFO - [2025-05-17 23:41:46] The server is fired up and ready to roll!
  4660. 2025-05-17 23:41:46,565 - __main__ - INFO - [2025-05-17 23:41:46] The server is fired up and ready to roll!
  4661. 2025-05-17 23:41:51,133 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-1
  4662. 2025-05-17 23:41:51,169 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-2
  4663. 2025-05-17 23:41:51,195 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-4
  4664. 2025-05-17 23:41:51,213 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-3
  4665. 2025-05-17 23:41:51,227 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496456/input.pdf-5
  4666. 2025-05-17 23:41:54,747 - __main__ - INFO - Queue remaining: 0
  4667. 2025-05-17 23:41:54,747 - __main__ - INFO -
  4668. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4669. ----------------------------------------------------------------------------------
  4670. 2025-05-17 23:41:54,747 - __main__ - INFO -
  4671. Worker ID | started
  4672. ----------+--------
  4673. 0 | 5
  4674. 2025-05-17 23:42:04,395 - sglang - INFO - [2025-05-17 23:42:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4675. 2025-05-17 23:42:04,395 - __main__ - INFO - sglang running req: 0 queue req: 0
  4676. 2025-05-17 23:42:04,748 - __main__ - INFO - Queue remaining: 0
  4677. 2025-05-17 23:42:04,748 - __main__ - INFO -
  4678. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4679. ----------------------------------------------------------------------------------
  4680. 2025-05-17 23:42:04,748 - __main__ - INFO -
  4681. Worker ID | started
  4682. ----------+--------
  4683. 0 | 5
  4684. 2025-05-17 23:42:06,400 - sglang - INFO - [2025-05-17 23:42:06 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  4685. 2025-05-17 23:42:06,400 - __main__ - INFO - sglang running req: 1 queue req: 0
  4686. 2025-05-17 23:42:10,279 - sglang - INFO - [2025-05-17 23:42:10 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.71, #queue-req: 0
  4687. 2025-05-17 23:42:10,279 - __main__ - INFO - sglang running req: 5 queue req: 0
  4688. 2025-05-17 23:42:11,137 - sglang - INFO - [2025-05-17 23:42:11 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.21, #queue-req: 0
  4689. 2025-05-17 23:42:11,137 - __main__ - INFO - sglang running req: 5 queue req: 0
  4690. 2025-05-17 23:42:11,994 - sglang - INFO - [2025-05-17 23:42:11 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.23, #queue-req: 0
  4691. 2025-05-17 23:42:11,994 - __main__ - INFO - sglang running req: 5 queue req: 0
  4692. 2025-05-17 23:42:12,851 - sglang - INFO - [2025-05-17 23:42:12 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.32, #queue-req: 0
  4693. 2025-05-17 23:42:12,852 - __main__ - INFO - sglang running req: 5 queue req: 0
  4694. 2025-05-17 23:42:13,708 - sglang - INFO - [2025-05-17 23:42:13 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.36, #queue-req: 0
  4695. 2025-05-17 23:42:13,709 - __main__ - INFO - sglang running req: 5 queue req: 0
  4696. 2025-05-17 23:42:14,567 - sglang - INFO - [2025-05-17 23:42:14 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.92, #queue-req: 0
  4697. 2025-05-17 23:42:14,567 - __main__ - INFO - sglang running req: 5 queue req: 0
  4698. 2025-05-17 23:42:14,749 - __main__ - INFO - Queue remaining: 0
  4699. 2025-05-17 23:42:14,749 - __main__ - INFO -
  4700. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4701. ----------------------------------------------------------------------------------
  4702. 2025-05-17 23:42:14,749 - __main__ - INFO -
  4703. Worker ID | started
  4704. ----------+--------
  4705. 0 | 5
  4706. 2025-05-17 23:42:15,428 - sglang - INFO - [2025-05-17 23:42:15 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.31, #queue-req: 0
  4707. 2025-05-17 23:42:15,428 - __main__ - INFO - sglang running req: 5 queue req: 0
  4708. 2025-05-17 23:42:16,290 - sglang - INFO - [2025-05-17 23:42:16 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.15, #queue-req: 0
  4709. 2025-05-17 23:42:16,290 - __main__ - INFO - sglang running req: 5 queue req: 0
  4710. 2025-05-17 23:42:16,813 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4711. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 1 exiting due to empty queue
  4712. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 2 exiting due to empty queue
  4713. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 3 exiting due to empty queue
  4714. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 4 exiting due to empty queue
  4715. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 5 exiting due to empty queue
  4716. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 6 exiting due to empty queue
  4717. 2025-05-17 23:42:16,813 - __main__ - INFO - Worker 7 exiting due to empty queue
  4718. 2025-05-17 23:42:17,147 - sglang - INFO - [2025-05-17 23:42:17 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.99, #queue-req: 0
  4719. 2025-05-17 23:42:17,147 - __main__ - INFO - sglang running req: 4 queue req: 0
  4720. 2025-05-17 23:42:17,990 - sglang - INFO - [2025-05-17 23:42:17 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.99, #queue-req: 0
  4721. 2025-05-17 23:42:17,990 - __main__ - INFO - sglang running req: 3 queue req: 0
  4722. 2025-05-17 23:42:18,822 - sglang - INFO - [2025-05-17 23:42:18 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 79.30, #queue-req: 0
  4723. 2025-05-17 23:42:18,822 - __main__ - INFO - sglang running req: 1 queue req: 0
  4724. 2025-05-17 23:42:19,647 - sglang - INFO - [2025-05-17 23:42:19 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.50, #queue-req: 0
  4725. 2025-05-17 23:42:19,647 - __main__ - INFO - sglang running req: 1 queue req: 0
  4726. 2025-05-17 23:42:20,373 - __main__ - INFO - Finished TaskGroup for worker on 199e15fb97f71d3ed170b35970694c1935783252
  4727. 2025-05-17 23:42:20,374 - __main__ - INFO - Got 1 docs for 199e15fb97f71d3ed170b35970694c1935783252
  4728. 2025-05-17 23:42:20,375 - __main__ - INFO - Worker 0 exiting due to empty queue
  4729. 2025-05-17 23:42:20,375 - __main__ - INFO - Work done
  4730. 2025-05-17 23:42:20,376 - __main__ - INFO - Got cancellation request for SGLang server
  4731. 2025-05-17 23:45:42,883 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4732. 2025-05-17 23:45:42,883 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496736/input.pdf as PDF document
  4733. 2025-05-17 23:45:42,883 - __main__ - INFO - Found 1 total pdf paths to add
  4734. 2025-05-17 23:45:42,887 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  4735. 2025-05-17 23:45:43,077 - __main__ - INFO - Starting pipeline with PID 460587
  4736. 2025-05-17 23:45:43,077 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4737. 2025-05-17 23:45:48,716 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4738. 2025-05-17 23:45:49,762 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4739. 2025-05-17 23:45:50,819 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4740. 2025-05-17 23:45:51,864 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4741. 2025-05-17 23:45:52,912 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4742. 2025-05-17 23:45:53,957 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4743. 2025-05-17 23:45:54,317 - sglang - INFO - [2025-05-17 23:45:54] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=257394431, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4744. 2025-05-17 23:45:54,317 - __main__ - INFO - [2025-05-17 23:45:54] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=257394431, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4745. 2025-05-17 23:45:55,008 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4746. 2025-05-17 23:45:56,052 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4747. 2025-05-17 23:45:57,098 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4748. 2025-05-17 23:45:58,141 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4749. 2025-05-17 23:45:59,217 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4750. 2025-05-17 23:46:00,262 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4751. 2025-05-17 23:46:01,310 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4752. 2025-05-17 23:46:02,350 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4753. 2025-05-17 23:46:03,409 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4754. 2025-05-17 23:46:03,779 - sglang - INFO - [2025-05-17 23:46:03] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4755. 2025-05-17 23:46:03,779 - __main__ - INFO - [2025-05-17 23:46:03] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4756. 2025-05-17 23:46:03,784 - sglang - INFO - [2025-05-17 23:46:03 TP0] Overlap scheduler is disabled for multimodal models.
  4757. 2025-05-17 23:46:03,784 - __main__ - INFO - [2025-05-17 23:46:03 TP0] Overlap scheduler is disabled for multimodal models.
  4758. 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4759. 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4760. 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4761. 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4762. 2025-05-17 23:46:04,488 - sglang - INFO - [2025-05-17 23:46:04 TP0] Init torch distributed begin.
  4763. 2025-05-17 23:46:04,488 - __main__ - INFO - [2025-05-17 23:46:04 TP0] Init torch distributed begin.
  4764. 2025-05-17 23:46:04,489 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4765. 2025-05-17 23:46:05,559 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4766. 2025-05-17 23:46:06,630 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4767. 2025-05-17 23:46:07,696 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4768. 2025-05-17 23:46:08,764 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4769. 2025-05-17 23:46:09,817 - sglang - INFO - [2025-05-17 23:46:09 TP0] Load weight begin. avail mem=23.33 GB
  4770. 2025-05-17 23:46:09,817 - __main__ - INFO - [2025-05-17 23:46:09 TP0] Load weight begin. avail mem=23.33 GB
  4771. 2025-05-17 23:46:09,819 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4772. 2025-05-17 23:46:10,887 - sglang - INFO - [2025-05-17 23:46:10 TP0] Using model weights format ['*.safetensors']
  4773. 2025-05-17 23:46:10,888 - __main__ - INFO - [2025-05-17 23:46:10 TP0] Using model weights format ['*.safetensors']
  4774. 2025-05-17 23:46:10,889 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4775. 2025-05-17 23:46:11,701 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4776. 2025-05-17 23:46:11,701 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4777. 2025-05-17 23:46:11,968 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4778. 2025-05-17 23:46:11,978 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.61it/s]
  4779. 2025-05-17 23:46:11,978 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.61it/s]
  4780. 2025-05-17 23:46:12,904 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  4781. 2025-05-17 23:46:12,905 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.52it/s]
  4782. 2025-05-17 23:46:13,047 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4783. 2025-05-17 23:46:13,837 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  4784. 2025-05-17 23:46:13,838 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  4785. 2025-05-17 23:46:14,126 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4786. 2025-05-17 23:46:14,746 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  4787. 2025-05-17 23:46:14,747 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  4788. 2025-05-17 23:46:14,747 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  4789. 2025-05-17 23:46:14,747 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  4790. 2025-05-17 23:46:14,747 - sglang - INFO -
  4791. 2025-05-17 23:46:14,747 - __main__ - INFO -
  4792. 2025-05-17 23:46:14,893 - sglang - INFO - [2025-05-17 23:46:14 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4793. 2025-05-17 23:46:14,893 - __main__ - INFO - [2025-05-17 23:46:14 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4794. 2025-05-17 23:46:14,900 - sglang - INFO - [2025-05-17 23:46:14 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4795. 2025-05-17 23:46:14,900 - __main__ - INFO - [2025-05-17 23:46:14 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4796. 2025-05-17 23:46:14,900 - sglang - INFO - [2025-05-17 23:46:14 TP0] Memory pool end. avail mem=5.30 GB
  4797. 2025-05-17 23:46:14,900 - __main__ - INFO - [2025-05-17 23:46:14 TP0] Memory pool end. avail mem=5.30 GB
  4798. 2025-05-17 23:46:15,079 - sglang - INFO - [2025-05-17 23:46:15 TP0] Capture cuda graph begin. This can take up to several minutes.
  4799. 2025-05-17 23:46:15,079 - __main__ - INFO - [2025-05-17 23:46:15 TP0] Capture cuda graph begin. This can take up to several minutes.
  4800. 2025-05-17 23:46:15,205 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4801. 2025-05-17 23:46:16,285 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4802. 2025-05-17 23:46:16,750 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.07it/s] 50%|█████ | 2/4 [00:01<00:01, 1.89it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.51it/s] 100%|██████████| 4/4 [00:01<00:00, 2.97it/s] 100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
  4803. 2025-05-17 23:46:16,750 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.07it/s] 50%|█████ | 2/4 [00:01<00:01, 1.89it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.51it/s] 100%|██████████| 4/4 [00:01<00:00, 2.97it/s] 100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
  4804. 2025-05-17 23:46:16,750 - sglang - INFO - [2025-05-17 23:46:16 TP0] Capture cuda graph end. Time elapsed: 1.67 s
  4805. 2025-05-17 23:46:16,751 - __main__ - INFO - [2025-05-17 23:46:16 TP0] Capture cuda graph end. Time elapsed: 1.67 s
  4806. 2025-05-17 23:46:17,365 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4807. 2025-05-17 23:46:18,435 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4808. 2025-05-17 23:46:19,506 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4809. 2025-05-17 23:46:19,632 - sglang - INFO - [2025-05-17 23:46:19 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4810. 2025-05-17 23:46:19,632 - __main__ - INFO - [2025-05-17 23:46:19 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4811. 2025-05-17 23:46:20,601 - __main__ - INFO - sglang server is ready.
  4812. 2025-05-17 23:46:20,601 - __main__ - INFO - Queue remaining: 1
  4813. 2025-05-17 23:46:20,601 - __main__ - INFO -
  4814. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4815. ----------------------------------------------------------------------------------
  4816. 2025-05-17 23:46:20,601 - __main__ - INFO -
  4817. Worker ID
  4818. ---------
  4819. 2025-05-17 23:46:20,602 - __main__ - INFO - Worker 0 processing work item d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
  4820. 2025-05-17 23:46:20,602 - __main__ - INFO - Created all tasks for d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
  4821. 2025-05-17 23:46:20,608 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496736/input.pdf in worker 0
  4822. 2025-05-17 23:46:20,705 - sglang - INFO - [2025-05-17 23:46:20 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4823. 2025-05-17 23:46:20,705 - __main__ - INFO - [2025-05-17 23:46:20 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4824. 2025-05-17 23:46:20,705 - __main__ - INFO - sglang running req: 0 queue req: 0
  4825. 2025-05-17 23:46:21,302 - sglang - INFO - [2025-05-17 23:46:21] The server is fired up and ready to roll!
  4826. 2025-05-17 23:46:21,303 - __main__ - INFO - [2025-05-17 23:46:21] The server is fired up and ready to roll!
  4827. 2025-05-17 23:46:26,848 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-1
  4828. 2025-05-17 23:46:26,884 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-2
  4829. 2025-05-17 23:46:26,919 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-3
  4830. 2025-05-17 23:46:26,947 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-4
  4831. 2025-05-17 23:46:26,982 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496736/input.pdf-5
  4832. 2025-05-17 23:46:30,679 - __main__ - INFO - Queue remaining: 0
  4833. 2025-05-17 23:46:30,679 - __main__ - INFO -
  4834. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4835. ----------------------------------------------------------------------------------
  4836. 2025-05-17 23:46:30,679 - __main__ - INFO -
  4837. Worker ID | started
  4838. ----------+--------
  4839. 0 | 5
  4840. 2025-05-17 23:46:40,681 - __main__ - INFO - Queue remaining: 0
  4841. 2025-05-17 23:46:40,681 - __main__ - INFO -
  4842. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4843. ----------------------------------------------------------------------------------
  4844. 2025-05-17 23:46:40,682 - __main__ - INFO -
  4845. Worker ID | started
  4846. ----------+--------
  4847. 0 | 5
  4848. 2025-05-17 23:46:45,601 - sglang - INFO - [2025-05-17 23:46:45 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4849. 2025-05-17 23:46:45,601 - __main__ - INFO - sglang running req: 0 queue req: 0
  4850. 2025-05-17 23:46:46,399 - sglang - INFO - [2025-05-17 23:46:46 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  4851. 2025-05-17 23:46:46,399 - __main__ - INFO - sglang running req: 1 queue req: 0
  4852. 2025-05-17 23:46:49,831 - sglang - INFO - [2025-05-17 23:46:49 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.70, #queue-req: 0
  4853. 2025-05-17 23:46:49,831 - __main__ - INFO - sglang running req: 5 queue req: 0
  4854. 2025-05-17 23:46:50,684 - __main__ - INFO - Queue remaining: 0
  4855. 2025-05-17 23:46:50,684 - __main__ - INFO -
  4856. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4857. ----------------------------------------------------------------------------------
  4858. 2025-05-17 23:46:50,684 - __main__ - INFO -
  4859. Worker ID | started
  4860. ----------+--------
  4861. 0 | 5
  4862. 2025-05-17 23:46:50,688 - sglang - INFO - [2025-05-17 23:46:50 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.27, #queue-req: 0
  4863. 2025-05-17 23:46:50,689 - __main__ - INFO - sglang running req: 5 queue req: 0
  4864. 2025-05-17 23:46:51,552 - sglang - INFO - [2025-05-17 23:46:51 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 231.64, #queue-req: 0
  4865. 2025-05-17 23:46:51,552 - __main__ - INFO - sglang running req: 5 queue req: 0
  4866. 2025-05-17 23:46:51,988 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  4867. 2025-05-17 23:46:51,988 - __main__ - INFO - Worker 1 exiting due to empty queue
  4868. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 2 exiting due to empty queue
  4869. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 3 exiting due to empty queue
  4870. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 4 exiting due to empty queue
  4871. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 5 exiting due to empty queue
  4872. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 6 exiting due to empty queue
  4873. 2025-05-17 23:46:51,989 - __main__ - INFO - Worker 7 exiting due to empty queue
  4874. 2025-05-17 23:46:52,413 - sglang - INFO - [2025-05-17 23:46:52 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 232.14, #queue-req: 0
  4875. 2025-05-17 23:46:52,414 - __main__ - INFO - sglang running req: 5 queue req: 0
  4876. 2025-05-17 23:46:53,274 - sglang - INFO - [2025-05-17 23:46:53 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.48, #queue-req: 0
  4877. 2025-05-17 23:46:53,274 - __main__ - INFO - sglang running req: 5 queue req: 0
  4878. 2025-05-17 23:46:54,135 - sglang - INFO - [2025-05-17 23:46:54 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.35, #queue-req: 0
  4879. 2025-05-17 23:46:54,135 - __main__ - INFO - sglang running req: 5 queue req: 0
  4880. 2025-05-17 23:46:54,997 - sglang - INFO - [2025-05-17 23:46:54 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.81, #queue-req: 0
  4881. 2025-05-17 23:46:54,997 - __main__ - INFO - sglang running req: 5 queue req: 0
  4882. 2025-05-17 23:46:55,860 - sglang - INFO - [2025-05-17 23:46:55 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.77, #queue-req: 0
  4883. 2025-05-17 23:46:55,861 - __main__ - INFO - sglang running req: 5 queue req: 0
  4884. 2025-05-17 23:46:56,720 - sglang - INFO - [2025-05-17 23:46:56 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.26, #queue-req: 0
  4885. 2025-05-17 23:46:56,720 - __main__ - INFO - sglang running req: 3 queue req: 0
  4886. 2025-05-17 23:46:57,563 - sglang - INFO - [2025-05-17 23:46:57 TP0] Decode batch. #running-req: 2, #token: 5146, token usage: 0.14, gen throughput (token/s): 137.59, #queue-req: 0
  4887. 2025-05-17 23:46:57,564 - __main__ - INFO - sglang running req: 2 queue req: 0
  4888. 2025-05-17 23:46:58,391 - sglang - INFO - [2025-05-17 23:46:58 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 62.83, #queue-req: 0
  4889. 2025-05-17 23:46:58,391 - __main__ - INFO - sglang running req: 1 queue req: 0
  4890. 2025-05-17 23:46:59,217 - sglang - INFO - [2025-05-17 23:46:59 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.45, #queue-req: 0
  4891. 2025-05-17 23:46:59,217 - __main__ - INFO - sglang running req: 1 queue req: 0
  4892. 2025-05-17 23:46:59,884 - __main__ - INFO - Finished TaskGroup for worker on d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
  4893. 2025-05-17 23:46:59,884 - __main__ - INFO - Got 1 docs for d7aa3200a01aa9ffa8f18aa1ecd0b8d69c60293b
  4894. 2025-05-17 23:46:59,885 - __main__ - INFO - Worker 0 exiting due to empty queue
  4895. 2025-05-17 23:46:59,886 - __main__ - INFO - Work done
  4896. 2025-05-17 23:46:59,886 - __main__ - INFO - Got cancellation request for SGLang server
  4897. 2025-05-17 23:47:46,541 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  4898. 2025-05-17 23:47:46,542 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496860/input.pdf as PDF document
  4899. 2025-05-17 23:47:46,542 - __main__ - INFO - Found 1 total pdf paths to add
  4900. 2025-05-17 23:47:46,546 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  4901. 2025-05-17 23:47:46,785 - __main__ - INFO - Starting pipeline with PID 462397
  4902. 2025-05-17 23:47:46,785 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  4903. 2025-05-17 23:47:47,586 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  4904. 2025-05-17 23:47:48,624 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  4905. 2025-05-17 23:47:49,683 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  4906. 2025-05-17 23:47:50,748 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  4907. 2025-05-17 23:47:51,817 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  4908. 2025-05-17 23:47:52,873 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  4909. 2025-05-17 23:47:52,883 - sglang - INFO - [2025-05-17 23:47:52] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=791956393, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4910. 2025-05-17 23:47:52,883 - __main__ - INFO - [2025-05-17 23:47:52] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=791956393, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  4911. 2025-05-17 23:47:53,946 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  4912. 2025-05-17 23:47:55,001 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  4913. 2025-05-17 23:47:56,068 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  4914. 2025-05-17 23:47:57,139 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  4915. 2025-05-17 23:47:58,189 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  4916. 2025-05-17 23:47:58,316 - sglang - INFO - [2025-05-17 23:47:58] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4917. 2025-05-17 23:47:58,316 - __main__ - INFO - [2025-05-17 23:47:58] Use chat template for the OpenAI-compatible API server: qwen2-vl
  4918. 2025-05-17 23:47:59,269 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  4919. 2025-05-17 23:48:00,340 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  4920. 2025-05-17 23:48:01,392 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  4921. 2025-05-17 23:48:02,458 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  4922. 2025-05-17 23:48:03,524 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  4923. 2025-05-17 23:48:04,590 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  4924. 2025-05-17 23:48:05,653 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  4925. 2025-05-17 23:48:06,708 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  4926. 2025-05-17 23:48:07,292 - sglang - INFO - [2025-05-17 23:48:07 TP0] Overlap scheduler is disabled for multimodal models.
  4927. 2025-05-17 23:48:07,292 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Overlap scheduler is disabled for multimodal models.
  4928. 2025-05-17 23:48:07,786 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  4929. 2025-05-17 23:48:07,810 - sglang - INFO - [2025-05-17 23:48:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4930. 2025-05-17 23:48:07,810 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  4931. 2025-05-17 23:48:07,810 - sglang - INFO - [2025-05-17 23:48:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4932. 2025-05-17 23:48:07,811 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  4933. 2025-05-17 23:48:07,811 - sglang - INFO - [2025-05-17 23:48:07 TP0] Init torch distributed begin.
  4934. 2025-05-17 23:48:07,811 - __main__ - INFO - [2025-05-17 23:48:07 TP0] Init torch distributed begin.
  4935. 2025-05-17 23:48:08,865 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  4936. 2025-05-17 23:48:09,932 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  4937. 2025-05-17 23:48:10,999 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  4938. 2025-05-17 23:48:12,065 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  4939. 2025-05-17 23:48:13,135 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  4940. 2025-05-17 23:48:13,220 - sglang - INFO - [2025-05-17 23:48:13 TP0] Load weight begin. avail mem=23.33 GB
  4941. 2025-05-17 23:48:13,221 - __main__ - INFO - [2025-05-17 23:48:13 TP0] Load weight begin. avail mem=23.33 GB
  4942. 2025-05-17 23:48:14,215 - sglang - INFO - [2025-05-17 23:48:14 TP0] Using model weights format ['*.safetensors']
  4943. 2025-05-17 23:48:14,216 - __main__ - INFO - [2025-05-17 23:48:14 TP0] Using model weights format ['*.safetensors']
  4944. 2025-05-17 23:48:14,217 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  4945. 2025-05-17 23:48:14,702 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4946. 2025-05-17 23:48:14,702 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  4947. 2025-05-17 23:48:14,989 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.49it/s]
  4948. 2025-05-17 23:48:14,989 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.49it/s]
  4949. 2025-05-17 23:48:15,296 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  4950. 2025-05-17 23:48:15,933 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  4951. 2025-05-17 23:48:15,933 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  4952. 2025-05-17 23:48:16,377 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  4953. 2025-05-17 23:48:16,879 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
  4954. 2025-05-17 23:48:16,879 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
  4955. 2025-05-17 23:48:17,456 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  4956. 2025-05-17 23:48:17,766 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  4957. 2025-05-17 23:48:17,766 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  4958. 2025-05-17 23:48:17,766 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  4959. 2025-05-17 23:48:17,767 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.31it/s]
  4960. 2025-05-17 23:48:17,767 - sglang - INFO -
  4961. 2025-05-17 23:48:17,767 - __main__ - INFO -
  4962. 2025-05-17 23:48:17,898 - sglang - INFO - [2025-05-17 23:48:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4963. 2025-05-17 23:48:17,899 - __main__ - INFO - [2025-05-17 23:48:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  4964. 2025-05-17 23:48:17,904 - sglang - INFO - [2025-05-17 23:48:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4965. 2025-05-17 23:48:17,904 - __main__ - INFO - [2025-05-17 23:48:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  4966. 2025-05-17 23:48:17,905 - sglang - INFO - [2025-05-17 23:48:17 TP0] Memory pool end. avail mem=5.30 GB
  4967. 2025-05-17 23:48:17,905 - __main__ - INFO - [2025-05-17 23:48:17 TP0] Memory pool end. avail mem=5.30 GB
  4968. 2025-05-17 23:48:18,057 - sglang - INFO - [2025-05-17 23:48:18 TP0] Capture cuda graph begin. This can take up to several minutes.
  4969. 2025-05-17 23:48:18,057 - __main__ - INFO - [2025-05-17 23:48:18 TP0] Capture cuda graph begin. This can take up to several minutes.
  4970. 2025-05-17 23:48:18,534 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  4971. 2025-05-17 23:48:19,613 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  4972. 2025-05-17 23:48:19,717 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.09it/s] 50%|█████ | 2/4 [00:01<00:01, 1.91it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.52it/s] 100%|██████████| 4/4 [00:01<00:00, 2.96it/s] 100%|██████████| 4/4 [00:01<00:00, 2.41it/s]
  4973. 2025-05-17 23:48:19,717 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.09it/s] 50%|█████ | 2/4 [00:01<00:01, 1.91it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.52it/s] 100%|██████████| 4/4 [00:01<00:00, 2.96it/s] 100%|██████████| 4/4 [00:01<00:00, 2.41it/s]
  4974. 2025-05-17 23:48:19,718 - sglang - INFO - [2025-05-17 23:48:19 TP0] Capture cuda graph end. Time elapsed: 1.66 s
  4975. 2025-05-17 23:48:19,718 - __main__ - INFO - [2025-05-17 23:48:19 TP0] Capture cuda graph end. Time elapsed: 1.66 s
  4976. 2025-05-17 23:48:20,689 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  4977. 2025-05-17 23:48:21,744 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  4978. 2025-05-17 23:48:22,450 - sglang - INFO - [2025-05-17 23:48:22 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4979. 2025-05-17 23:48:22,451 - __main__ - INFO - [2025-05-17 23:48:22 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  4980. 2025-05-17 23:48:22,837 - __main__ - INFO - sglang server is ready.
  4981. 2025-05-17 23:48:22,837 - __main__ - INFO - Queue remaining: 1
  4982. 2025-05-17 23:48:22,838 - __main__ - INFO -
  4983. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  4984. ----------------------------------------------------------------------------------
  4985. 2025-05-17 23:48:22,838 - __main__ - INFO -
  4986. Worker ID
  4987. ---------
  4988. 2025-05-17 23:48:22,838 - __main__ - INFO - Worker 0 processing work item 206e70c2ba138820c52d22ba8bfb11820a7b737b
  4989. 2025-05-17 23:48:22,838 - __main__ - INFO - Created all tasks for 206e70c2ba138820c52d22ba8bfb11820a7b737b
  4990. 2025-05-17 23:48:22,844 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496860/input.pdf in worker 0
  4991. 2025-05-17 23:48:23,527 - sglang - INFO - [2025-05-17 23:48:23 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4992. 2025-05-17 23:48:23,528 - __main__ - INFO - [2025-05-17 23:48:23 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  4993. 2025-05-17 23:48:23,528 - __main__ - INFO - sglang running req: 0 queue req: 0
  4994. 2025-05-17 23:48:24,409 - sglang - INFO - [2025-05-17 23:48:24] The server is fired up and ready to roll!
  4995. 2025-05-17 23:48:24,409 - __main__ - INFO - [2025-05-17 23:48:24] The server is fired up and ready to roll!
  4996. 2025-05-17 23:48:29,517 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-1
  4997. 2025-05-17 23:48:29,542 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-2
  4998. 2025-05-17 23:48:29,553 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-4
  4999. 2025-05-17 23:48:29,560 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-3
  5000. 2025-05-17 23:48:29,583 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496860/input.pdf-5
  5001. 2025-05-17 23:48:32,838 - __main__ - INFO - Queue remaining: 0
  5002. 2025-05-17 23:48:32,839 - __main__ - INFO -
  5003. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5004. ----------------------------------------------------------------------------------
  5005. 2025-05-17 23:48:32,839 - __main__ - INFO -
  5006. Worker ID | started
  5007. ----------+--------
  5008. 0 | 5
  5009. 2025-05-17 23:48:40,487 - sglang - INFO - [2025-05-17 23:48:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5010. 2025-05-17 23:48:40,488 - __main__ - INFO - sglang running req: 0 queue req: 0
  5011. 2025-05-17 23:48:42,508 - sglang - INFO - [2025-05-17 23:48:42 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  5012. 2025-05-17 23:48:42,508 - __main__ - INFO - sglang running req: 1 queue req: 0
  5013. 2025-05-17 23:48:42,839 - __main__ - INFO - Queue remaining: 0
  5014. 2025-05-17 23:48:42,840 - __main__ - INFO -
  5015. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5016. ----------------------------------------------------------------------------------
  5017. 2025-05-17 23:48:42,840 - __main__ - INFO -
  5018. Worker ID | started
  5019. ----------+--------
  5020. 0 | 5
  5021. 2025-05-17 23:48:47,326 - sglang - INFO - [2025-05-17 23:48:47 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.91, #queue-req: 0
  5022. 2025-05-17 23:48:47,327 - __main__ - INFO - sglang running req: 5 queue req: 0
  5023. 2025-05-17 23:48:48,191 - sglang - INFO - [2025-05-17 23:48:48 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 231.36, #queue-req: 0
  5024. 2025-05-17 23:48:48,191 - __main__ - INFO - sglang running req: 5 queue req: 0
  5025. 2025-05-17 23:48:49,054 - sglang - INFO - [2025-05-17 23:48:49 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 231.71, #queue-req: 0
  5026. 2025-05-17 23:48:49,054 - __main__ - INFO - sglang running req: 5 queue req: 0
  5027. 2025-05-17 23:48:49,917 - sglang - INFO - [2025-05-17 23:48:49 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 231.67, #queue-req: 0
  5028. 2025-05-17 23:48:49,917 - __main__ - INFO - sglang running req: 5 queue req: 0
  5029. 2025-05-17 23:48:50,779 - sglang - INFO - [2025-05-17 23:48:50 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.02, #queue-req: 0
  5030. 2025-05-17 23:48:50,779 - __main__ - INFO - sglang running req: 5 queue req: 0
  5031. 2025-05-17 23:48:51,644 - sglang - INFO - [2025-05-17 23:48:51 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 231.35, #queue-req: 0
  5032. 2025-05-17 23:48:51,644 - __main__ - INFO - sglang running req: 5 queue req: 0
  5033. 2025-05-17 23:48:52,506 - sglang - INFO - [2025-05-17 23:48:52 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.03, #queue-req: 0
  5034. 2025-05-17 23:48:52,506 - __main__ - INFO - sglang running req: 5 queue req: 0
  5035. 2025-05-17 23:48:52,841 - __main__ - INFO - Queue remaining: 0
  5036. 2025-05-17 23:48:52,841 - __main__ - INFO -
  5037. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5038. ----------------------------------------------------------------------------------
  5039. 2025-05-17 23:48:52,841 - __main__ - INFO -
  5040. Worker ID | started
  5041. ----------+--------
  5042. 0 | 5
  5043. 2025-05-17 23:48:53,367 - sglang - INFO - [2025-05-17 23:48:53 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.19, #queue-req: 0
  5044. 2025-05-17 23:48:53,367 - __main__ - INFO - sglang running req: 5 queue req: 0
  5045. 2025-05-17 23:48:54,227 - sglang - INFO - [2025-05-17 23:48:54 TP0] Decode batch. #running-req: 4, #token: 7360, token usage: 0.19, gen throughput (token/s): 218.53, #queue-req: 0
  5046. 2025-05-17 23:48:54,227 - __main__ - INFO - sglang running req: 4 queue req: 0
  5047. 2025-05-17 23:48:54,815 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5048. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 1 exiting due to empty queue
  5049. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 2 exiting due to empty queue
  5050. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 3 exiting due to empty queue
  5051. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 4 exiting due to empty queue
  5052. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 5 exiting due to empty queue
  5053. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 6 exiting due to empty queue
  5054. 2025-05-17 23:48:54,816 - __main__ - INFO - Worker 7 exiting due to empty queue
  5055. 2025-05-17 23:48:55,070 - sglang - INFO - [2025-05-17 23:48:55 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.35, #queue-req: 0
  5056. 2025-05-17 23:48:55,070 - __main__ - INFO - sglang running req: 3 queue req: 0
  5057. 2025-05-17 23:48:55,905 - sglang - INFO - [2025-05-17 23:48:55 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 83.86, #queue-req: 0
  5058. 2025-05-17 23:48:55,905 - __main__ - INFO - sglang running req: 1 queue req: 0
  5059. 2025-05-17 23:48:56,731 - sglang - INFO - [2025-05-17 23:48:56 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.44, #queue-req: 0
  5060. 2025-05-17 23:48:56,731 - __main__ - INFO - sglang running req: 1 queue req: 0
  5061. 2025-05-17 23:48:57,521 - __main__ - INFO - Finished TaskGroup for worker on 206e70c2ba138820c52d22ba8bfb11820a7b737b
  5062. 2025-05-17 23:48:57,521 - __main__ - INFO - Got 1 docs for 206e70c2ba138820c52d22ba8bfb11820a7b737b
  5063. 2025-05-17 23:48:57,523 - __main__ - INFO - Worker 0 exiting due to empty queue
  5064. 2025-05-17 23:48:57,523 - __main__ - INFO - Work done
  5065. 2025-05-17 23:48:57,523 - __main__ - INFO - Got cancellation request for SGLang server
  5066. 2025-05-17 23:49:27,025 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5067. 2025-05-17 23:49:27,026 - __main__ - INFO - Loading file at olmocr_workspace/job_1747496960/input.pdf as PDF document
  5068. 2025-05-17 23:49:27,026 - __main__ - INFO - Found 1 total pdf paths to add
  5069. 2025-05-17 23:49:27,030 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  5070. 2025-05-17 23:49:27,269 - __main__ - INFO - Starting pipeline with PID 464121
  5071. 2025-05-17 23:49:27,269 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5072. 2025-05-17 23:49:33,982 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5073. 2025-05-17 23:49:35,028 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5074. 2025-05-17 23:49:36,085 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5075. 2025-05-17 23:49:37,143 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5076. 2025-05-17 23:49:38,201 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5077. 2025-05-17 23:49:39,258 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5078. 2025-05-17 23:49:40,248 - sglang - INFO - [2025-05-17 23:49:40] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=655980969, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5079. 2025-05-17 23:49:40,248 - __main__ - INFO - [2025-05-17 23:49:40] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=655980969, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5080. 2025-05-17 23:49:40,301 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5081. 2025-05-17 23:49:41,365 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5082. 2025-05-17 23:49:42,423 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5083. 2025-05-17 23:49:43,481 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5084. 2025-05-17 23:49:44,257 - sglang - INFO - [2025-05-17 23:49:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5085. 2025-05-17 23:49:44,257 - __main__ - INFO - [2025-05-17 23:49:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5086. 2025-05-17 23:49:44,540 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5087. 2025-05-17 23:49:45,585 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5088. 2025-05-17 23:49:46,634 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5089. 2025-05-17 23:49:47,701 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5090. 2025-05-17 23:49:48,772 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5091. 2025-05-17 23:49:49,842 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5092. 2025-05-17 23:49:50,908 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5093. 2025-05-17 23:49:51,974 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5094. 2025-05-17 23:49:53,036 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5095. 2025-05-17 23:49:54,091 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5096. 2025-05-17 23:49:55,157 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5097. 2025-05-17 23:49:56,225 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5098. 2025-05-17 23:49:57,290 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5099. 2025-05-17 23:49:58,355 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5100. 2025-05-17 23:49:59,429 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  5101. 2025-05-17 23:50:00,500 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  5102. 2025-05-17 23:50:01,570 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  5103. 2025-05-17 23:50:02,641 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  5104. 2025-05-17 23:50:03,711 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  5105. 2025-05-17 23:50:03,784 - sglang - INFO - [2025-05-17 23:50:03 TP0] Overlap scheduler is disabled for multimodal models.
  5106. 2025-05-17 23:50:03,784 - __main__ - INFO - [2025-05-17 23:50:03 TP0] Overlap scheduler is disabled for multimodal models.
  5107. 2025-05-17 23:50:04,792 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  5108. 2025-05-17 23:50:05,862 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  5109. 2025-05-17 23:50:06,928 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  5110. 2025-05-17 23:50:07,994 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  5111. 2025-05-17 23:50:09,061 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  5112. 2025-05-17 23:50:10,115 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  5113. 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5114. 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5115. 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5116. 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5117. 2025-05-17 23:50:10,421 - sglang - INFO - [2025-05-17 23:50:10 TP0] Init torch distributed begin.
  5118. 2025-05-17 23:50:10,421 - __main__ - INFO - [2025-05-17 23:50:10 TP0] Init torch distributed begin.
  5119. 2025-05-17 23:50:11,193 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  5120. 2025-05-17 23:50:12,262 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  5121. 2025-05-17 23:50:13,329 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  5122. 2025-05-17 23:50:14,394 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  5123. 2025-05-17 23:50:15,461 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  5124. 2025-05-17 23:50:15,784 - sglang - INFO - [2025-05-17 23:50:15 TP0] Load weight begin. avail mem=23.33 GB
  5125. 2025-05-17 23:50:15,784 - __main__ - INFO - [2025-05-17 23:50:15 TP0] Load weight begin. avail mem=23.33 GB
  5126. 2025-05-17 23:50:16,540 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  5127. 2025-05-17 23:50:17,404 - sglang - INFO - [2025-05-17 23:50:17 TP0] Using model weights format ['*.safetensors']
  5128. 2025-05-17 23:50:17,404 - __main__ - INFO - [2025-05-17 23:50:17 TP0] Using model weights format ['*.safetensors']
  5129. 2025-05-17 23:50:17,618 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  5130. 2025-05-17 23:50:17,922 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5131. 2025-05-17 23:50:17,922 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5132. 2025-05-17 23:50:18,292 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.70it/s]
  5133. 2025-05-17 23:50:18,292 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.70it/s]
  5134. 2025-05-17 23:50:18,697 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  5135. 2025-05-17 23:50:19,427 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
  5136. 2025-05-17 23:50:19,427 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.22it/s]
  5137. 2025-05-17 23:50:19,776 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  5138. 2025-05-17 23:50:20,591 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.02it/s]
  5139. 2025-05-17 23:50:20,591 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.02it/s]
  5140. 2025-05-17 23:50:20,856 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  5141. 2025-05-17 23:50:21,685 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.02s/it]
  5142. 2025-05-17 23:50:21,685 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.02s/it]
  5143. 2025-05-17 23:50:21,686 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
  5144. 2025-05-17 23:50:21,686 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
  5145. 2025-05-17 23:50:21,686 - sglang - INFO -
  5146. 2025-05-17 23:50:21,686 - __main__ - INFO -
  5147. 2025-05-17 23:50:21,934 - sglang - INFO - [2025-05-17 23:50:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5148. 2025-05-17 23:50:21,934 - __main__ - INFO - [2025-05-17 23:50:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5149. 2025-05-17 23:50:21,935 - sglang - INFO - [2025-05-17 23:50:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5150. 2025-05-17 23:50:21,935 - __main__ - INFO - [2025-05-17 23:50:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5151. 2025-05-17 23:50:21,935 - sglang - INFO - [2025-05-17 23:50:21 TP0] Memory pool end. avail mem=5.30 GB
  5152. 2025-05-17 23:50:21,935 - __main__ - INFO - [2025-05-17 23:50:21 TP0] Memory pool end. avail mem=5.30 GB
  5153. 2025-05-17 23:50:21,936 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  5154. 2025-05-17 23:50:22,026 - sglang - INFO - [2025-05-17 23:50:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  5155. 2025-05-17 23:50:22,027 - __main__ - INFO - [2025-05-17 23:50:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  5156. 2025-05-17 23:50:23,016 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  5157. 2025-05-17 23:50:23,793 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.01s/it] 50%|█████ | 2/4 [00:01<00:01, 1.78it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.37it/s] 100%|██████████| 4/4 [00:01<00:00, 2.83it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  5158. 2025-05-17 23:50:23,793 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.01s/it] 50%|█████ | 2/4 [00:01<00:01, 1.78it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.37it/s] 100%|██████████| 4/4 [00:01<00:00, 2.83it/s] 100%|██████████| 4/4 [00:01<00:00, 2.27it/s]
  5159. 2025-05-17 23:50:23,793 - sglang - INFO - [2025-05-17 23:50:23 TP0] Capture cuda graph end. Time elapsed: 1.77 s
  5160. 2025-05-17 23:50:23,793 - __main__ - INFO - [2025-05-17 23:50:23 TP0] Capture cuda graph end. Time elapsed: 1.77 s
  5161. 2025-05-17 23:50:24,094 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  5162. 2025-05-17 23:50:25,149 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  5163. 2025-05-17 23:50:26,214 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  5164. 2025-05-17 23:50:26,513 - sglang - INFO - [2025-05-17 23:50:26 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5165. 2025-05-17 23:50:26,513 - __main__ - INFO - [2025-05-17 23:50:26 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5166. 2025-05-17 23:50:27,309 - __main__ - INFO - sglang server is ready.
  5167. 2025-05-17 23:50:27,309 - __main__ - INFO - Queue remaining: 1
  5168. 2025-05-17 23:50:27,309 - __main__ - INFO -
  5169. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5170. ----------------------------------------------------------------------------------
  5171. 2025-05-17 23:50:27,309 - __main__ - INFO -
  5172. Worker ID
  5173. ---------
  5174. 2025-05-17 23:50:27,309 - __main__ - INFO - Worker 0 processing work item 9409ac69e0698bb53cba0d186d3996d4d9f95a62
  5175. 2025-05-17 23:50:27,309 - __main__ - INFO - Created all tasks for 9409ac69e0698bb53cba0d186d3996d4d9f95a62
  5176. 2025-05-17 23:50:27,315 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747496960/input.pdf in worker 0
  5177. 2025-05-17 23:50:27,589 - sglang - INFO - [2025-05-17 23:50:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5178. 2025-05-17 23:50:27,589 - __main__ - INFO - [2025-05-17 23:50:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5179. 2025-05-17 23:50:27,589 - __main__ - INFO - sglang running req: 0 queue req: 0
  5180. 2025-05-17 23:50:28,282 - sglang - INFO - [2025-05-17 23:50:28] The server is fired up and ready to roll!
  5181. 2025-05-17 23:50:28,282 - __main__ - INFO - [2025-05-17 23:50:28] The server is fired up and ready to roll!
  5182. 2025-05-17 23:50:33,560 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-1
  5183. 2025-05-17 23:50:33,599 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-2
  5184. 2025-05-17 23:50:33,601 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-3
  5185. 2025-05-17 23:50:33,632 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-5
  5186. 2025-05-17 23:50:33,633 - __main__ - INFO - Built page query for olmocr_workspace/job_1747496960/input.pdf-4
  5187. 2025-05-17 23:50:37,311 - __main__ - INFO - Queue remaining: 0
  5188. 2025-05-17 23:50:37,311 - __main__ - INFO -
  5189. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5190. ----------------------------------------------------------------------------------
  5191. 2025-05-17 23:50:37,311 - __main__ - INFO -
  5192. Worker ID | started
  5193. ----------+--------
  5194. 0 | 5
  5195. 2025-05-17 23:50:42,193 - sglang - INFO - [2025-05-17 23:50:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5196. 2025-05-17 23:50:42,194 - __main__ - INFO - sglang running req: 0 queue req: 0
  5197. 2025-05-17 23:50:44,294 - sglang - INFO - [2025-05-17 23:50:44 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  5198. 2025-05-17 23:50:44,294 - __main__ - INFO - sglang running req: 1 queue req: 0
  5199. 2025-05-17 23:50:47,312 - __main__ - INFO - Queue remaining: 0
  5200. 2025-05-17 23:50:47,313 - __main__ - INFO -
  5201. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5202. ----------------------------------------------------------------------------------
  5203. 2025-05-17 23:50:47,313 - __main__ - INFO -
  5204. Worker ID | started
  5205. ----------+--------
  5206. 0 | 5
  5207. 2025-05-17 23:50:49,825 - sglang - INFO - [2025-05-17 23:50:49 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 7.38, #queue-req: 0
  5208. 2025-05-17 23:50:49,826 - __main__ - INFO - sglang running req: 5 queue req: 0
  5209. 2025-05-17 23:50:50,684 - sglang - INFO - [2025-05-17 23:50:50 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.91, #queue-req: 0
  5210. 2025-05-17 23:50:50,684 - __main__ - INFO - sglang running req: 5 queue req: 0
  5211. 2025-05-17 23:50:51,543 - sglang - INFO - [2025-05-17 23:50:51 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.78, #queue-req: 0
  5212. 2025-05-17 23:50:51,543 - __main__ - INFO - sglang running req: 5 queue req: 0
  5213. 2025-05-17 23:50:52,401 - sglang - INFO - [2025-05-17 23:50:52 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.19, #queue-req: 0
  5214. 2025-05-17 23:50:52,401 - __main__ - INFO - sglang running req: 5 queue req: 0
  5215. 2025-05-17 23:50:53,259 - sglang - INFO - [2025-05-17 23:50:53 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.05, #queue-req: 0
  5216. 2025-05-17 23:50:53,259 - __main__ - INFO - sglang running req: 5 queue req: 0
  5217. 2025-05-17 23:50:54,119 - sglang - INFO - [2025-05-17 23:50:54 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.53, #queue-req: 0
  5218. 2025-05-17 23:50:54,120 - __main__ - INFO - sglang running req: 5 queue req: 0
  5219. 2025-05-17 23:50:54,981 - sglang - INFO - [2025-05-17 23:50:54 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.07, #queue-req: 0
  5220. 2025-05-17 23:50:54,981 - __main__ - INFO - sglang running req: 5 queue req: 0
  5221. 2025-05-17 23:50:55,843 - sglang - INFO - [2025-05-17 23:50:55 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.13, #queue-req: 0
  5222. 2025-05-17 23:50:55,843 - __main__ - INFO - sglang running req: 5 queue req: 0
  5223. 2025-05-17 23:50:56,705 - sglang - INFO - [2025-05-17 23:50:56 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.89, #queue-req: 0
  5224. 2025-05-17 23:50:56,705 - __main__ - INFO - sglang running req: 3 queue req: 0
  5225. 2025-05-17 23:50:57,314 - __main__ - INFO - Queue remaining: 0
  5226. 2025-05-17 23:50:57,314 - __main__ - INFO -
  5227. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5228. ----------------------------------------------------------------------------------
  5229. sglang_input_tokens 44.56 44.56
  5230. sglang_output_tokens 7.70 7.70
  5231. 2025-05-17 23:50:57,314 - __main__ - INFO -
  5232. Worker ID | finished | started
  5233. ----------+----------+--------
  5234. 0 | 2 | 5
  5235. 2025-05-17 23:50:57,548 - sglang - INFO - [2025-05-17 23:50:57 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.28, #queue-req: 0
  5236. 2025-05-17 23:50:57,548 - __main__ - INFO - sglang running req: 3 queue req: 0
  5237. 2025-05-17 23:50:58,382 - sglang - INFO - [2025-05-17 23:50:58 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 77.97, #queue-req: 0
  5238. 2025-05-17 23:50:58,382 - __main__ - INFO - sglang running req: 1 queue req: 0
  5239. 2025-05-17 23:50:59,207 - sglang - INFO - [2025-05-17 23:50:59 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.49, #queue-req: 0
  5240. 2025-05-17 23:50:59,207 - __main__ - INFO - sglang running req: 1 queue req: 0
  5241. 2025-05-17 23:50:59,217 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5242. 2025-05-17 23:50:59,217 - __main__ - INFO - Worker 1 exiting due to empty queue
  5243. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 2 exiting due to empty queue
  5244. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 3 exiting due to empty queue
  5245. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 4 exiting due to empty queue
  5246. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 5 exiting due to empty queue
  5247. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 6 exiting due to empty queue
  5248. 2025-05-17 23:50:59,218 - __main__ - INFO - Worker 7 exiting due to empty queue
  5249. 2025-05-17 23:50:59,872 - __main__ - INFO - Finished TaskGroup for worker on 9409ac69e0698bb53cba0d186d3996d4d9f95a62
  5250. 2025-05-17 23:50:59,873 - __main__ - INFO - Got 1 docs for 9409ac69e0698bb53cba0d186d3996d4d9f95a62
  5251. 2025-05-17 23:50:59,874 - __main__ - INFO - Worker 0 exiting due to empty queue
  5252. 2025-05-17 23:50:59,875 - __main__ - INFO - Work done
  5253. 2025-05-17 23:50:59,875 - __main__ - INFO - Got cancellation request for SGLang server
  5254. 2025-05-17 23:51:31,061 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5255. 2025-05-17 23:51:31,062 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497084/input.pdf as PDF document
  5256. 2025-05-17 23:51:31,062 - __main__ - INFO - Found 1 total pdf paths to add
  5257. 2025-05-17 23:51:31,066 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  5258. 2025-05-17 23:51:31,312 - __main__ - INFO - Starting pipeline with PID 465812
  5259. 2025-05-17 23:51:31,312 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5260. 2025-05-17 23:51:36,943 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5261. 2025-05-17 23:51:37,990 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5262. 2025-05-17 23:51:39,041 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5263. 2025-05-17 23:51:40,106 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5264. 2025-05-17 23:51:41,172 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5265. 2025-05-17 23:51:42,217 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5266. 2025-05-17 23:51:42,690 - sglang - INFO - [2025-05-17 23:51:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247307952, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5267. 2025-05-17 23:51:42,690 - __main__ - INFO - [2025-05-17 23:51:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247307952, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5268. 2025-05-17 23:51:43,265 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5269. 2025-05-17 23:51:44,298 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5270. 2025-05-17 23:51:45,345 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5271. 2025-05-17 23:51:46,412 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5272. 2025-05-17 23:51:47,478 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5273. 2025-05-17 23:51:48,544 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5274. 2025-05-17 23:51:49,610 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5275. 2025-05-17 23:51:50,676 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5276. 2025-05-17 23:51:51,742 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5277. 2025-05-17 23:51:52,530 - sglang - INFO - [2025-05-17 23:51:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5278. 2025-05-17 23:51:52,530 - __main__ - INFO - [2025-05-17 23:51:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5279. 2025-05-17 23:51:52,821 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5280. 2025-05-17 23:51:53,892 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5281. 2025-05-17 23:51:54,957 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5282. 2025-05-17 23:51:56,024 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5283. 2025-05-17 23:51:57,078 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5284. 2025-05-17 23:51:57,861 - sglang - INFO - [2025-05-17 23:51:57 TP0] Overlap scheduler is disabled for multimodal models.
  5285. 2025-05-17 23:51:57,861 - __main__ - INFO - [2025-05-17 23:51:57 TP0] Overlap scheduler is disabled for multimodal models.
  5286. 2025-05-17 23:51:58,157 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5287. 2025-05-17 23:51:58,646 - sglang - INFO - [2025-05-17 23:51:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5288. 2025-05-17 23:51:58,646 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5289. 2025-05-17 23:51:58,646 - sglang - INFO - [2025-05-17 23:51:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5290. 2025-05-17 23:51:58,646 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5291. 2025-05-17 23:51:58,647 - sglang - INFO - [2025-05-17 23:51:58 TP0] Init torch distributed begin.
  5292. 2025-05-17 23:51:58,647 - __main__ - INFO - [2025-05-17 23:51:58 TP0] Init torch distributed begin.
  5293. 2025-05-17 23:51:59,236 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5294. 2025-05-17 23:52:00,302 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5295. 2025-05-17 23:52:01,368 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5296. 2025-05-17 23:52:02,433 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  5297. 2025-05-17 23:52:03,500 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  5298. 2025-05-17 23:52:04,027 - sglang - INFO - [2025-05-17 23:52:04 TP0] Load weight begin. avail mem=23.33 GB
  5299. 2025-05-17 23:52:04,027 - __main__ - INFO - [2025-05-17 23:52:04 TP0] Load weight begin. avail mem=23.33 GB
  5300. 2025-05-17 23:52:04,580 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  5301. 2025-05-17 23:52:05,517 - sglang - INFO - [2025-05-17 23:52:05 TP0] Using model weights format ['*.safetensors']
  5302. 2025-05-17 23:52:05,517 - __main__ - INFO - [2025-05-17 23:52:05 TP0] Using model weights format ['*.safetensors']
  5303. 2025-05-17 23:52:05,658 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  5304. 2025-05-17 23:52:06,011 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5305. 2025-05-17 23:52:06,011 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5306. 2025-05-17 23:52:06,339 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.06it/s]
  5307. 2025-05-17 23:52:06,339 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.06it/s]
  5308. 2025-05-17 23:52:06,737 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  5309. 2025-05-17 23:52:07,396 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.32it/s]
  5310. 2025-05-17 23:52:07,397 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.32it/s]
  5311. 2025-05-17 23:52:07,817 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  5312. 2025-05-17 23:52:08,454 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.12it/s]
  5313. 2025-05-17 23:52:08,454 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.12it/s]
  5314. 2025-05-17 23:52:08,895 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  5315. 2025-05-17 23:52:09,493 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
  5316. 2025-05-17 23:52:09,494 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
  5317. 2025-05-17 23:52:09,494 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
  5318. 2025-05-17 23:52:09,494 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
  5319. 2025-05-17 23:52:09,494 - sglang - INFO -
  5320. 2025-05-17 23:52:09,494 - __main__ - INFO -
  5321. 2025-05-17 23:52:09,639 - sglang - INFO - [2025-05-17 23:52:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5322. 2025-05-17 23:52:09,639 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5323. 2025-05-17 23:52:09,645 - sglang - INFO - [2025-05-17 23:52:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5324. 2025-05-17 23:52:09,645 - __main__ - INFO - [2025-05-17 23:52:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5325. 2025-05-17 23:52:09,645 - sglang - INFO - [2025-05-17 23:52:09 TP0] Memory pool end. avail mem=5.30 GB
  5326. 2025-05-17 23:52:09,645 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Memory pool end. avail mem=5.30 GB
  5327. 2025-05-17 23:52:09,795 - sglang - INFO - [2025-05-17 23:52:09 TP0] Capture cuda graph begin. This can take up to several minutes.
  5328. 2025-05-17 23:52:09,796 - __main__ - INFO - [2025-05-17 23:52:09 TP0] Capture cuda graph begin. This can take up to several minutes.
  5329. 2025-05-17 23:52:09,973 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  5330. 2025-05-17 23:52:11,054 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  5331. 2025-05-17 23:52:11,462 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.10it/s] 50%|█████ | 2/4 [00:01<00:01, 1.92it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.94it/s] 100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
  5332. 2025-05-17 23:52:11,462 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.10it/s] 50%|█████ | 2/4 [00:01<00:01, 1.92it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.94it/s] 100%|██████████| 4/4 [00:01<00:00, 2.40it/s]
  5333. 2025-05-17 23:52:11,462 - sglang - INFO - [2025-05-17 23:52:11 TP0] Capture cuda graph end. Time elapsed: 1.67 s
  5334. 2025-05-17 23:52:11,462 - __main__ - INFO - [2025-05-17 23:52:11 TP0] Capture cuda graph end. Time elapsed: 1.67 s
  5335. 2025-05-17 23:52:12,133 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  5336. 2025-05-17 23:52:13,202 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  5337. 2025-05-17 23:52:14,268 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  5338. 2025-05-17 23:52:14,610 - sglang - INFO - [2025-05-17 23:52:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5339. 2025-05-17 23:52:14,610 - __main__ - INFO - [2025-05-17 23:52:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5340. 2025-05-17 23:52:15,364 - __main__ - INFO - sglang server is ready.
  5341. 2025-05-17 23:52:15,364 - __main__ - INFO - Queue remaining: 1
  5342. 2025-05-17 23:52:15,364 - __main__ - INFO -
  5343. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5344. ----------------------------------------------------------------------------------
  5345. 2025-05-17 23:52:15,364 - __main__ - INFO -
  5346. Worker ID
  5347. ---------
  5348. 2025-05-17 23:52:15,364 - __main__ - INFO - Worker 0 processing work item e583124473577446455a2982cc1a1469d21fc0a1
  5349. 2025-05-17 23:52:15,365 - __main__ - INFO - Created all tasks for e583124473577446455a2982cc1a1469d21fc0a1
  5350. 2025-05-17 23:52:15,371 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747497084/input.pdf in worker 0
  5351. 2025-05-17 23:52:15,684 - sglang - INFO - [2025-05-17 23:52:15 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5352. 2025-05-17 23:52:15,684 - __main__ - INFO - [2025-05-17 23:52:15 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5353. 2025-05-17 23:52:15,684 - __main__ - INFO - sglang running req: 0 queue req: 0
  5354. 2025-05-17 23:52:16,505 - sglang - INFO - [2025-05-17 23:52:16] The server is fired up and ready to roll!
  5355. 2025-05-17 23:52:16,505 - __main__ - INFO - [2025-05-17 23:52:16] The server is fired up and ready to roll!
  5356. 2025-05-17 23:52:21,875 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-1
  5357. 2025-05-17 23:52:21,904 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-2
  5358. 2025-05-17 23:52:21,919 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-3
  5359. 2025-05-17 23:52:21,939 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-4
  5360. 2025-05-17 23:52:21,958 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497084/input.pdf-5
  5361. 2025-05-17 23:52:25,379 - __main__ - INFO - Queue remaining: 0
  5362. 2025-05-17 23:52:25,379 - __main__ - INFO -
  5363. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5364. ----------------------------------------------------------------------------------
  5365. 2025-05-17 23:52:25,379 - __main__ - INFO -
  5366. Worker ID | started
  5367. ----------+--------
  5368. 0 | 5
  5369. 2025-05-17 23:52:35,381 - __main__ - INFO - Queue remaining: 0
  5370. 2025-05-17 23:52:35,381 - __main__ - INFO -
  5371. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5372. ----------------------------------------------------------------------------------
  5373. 2025-05-17 23:52:35,381 - __main__ - INFO -
  5374. Worker ID | started
  5375. ----------+--------
  5376. 0 | 5
  5377. 2025-05-17 23:52:41,907 - sglang - INFO - [2025-05-17 23:52:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5378. 2025-05-17 23:52:41,907 - __main__ - INFO - sglang running req: 0 queue req: 0
  5379. 2025-05-17 23:52:42,739 - sglang - INFO - [2025-05-17 23:52:42 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  5380. 2025-05-17 23:52:42,739 - __main__ - INFO - sglang running req: 1 queue req: 0
  5381. 2025-05-17 23:52:45,382 - __main__ - INFO - Queue remaining: 0
  5382. 2025-05-17 23:52:45,383 - __main__ - INFO -
  5383. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5384. ----------------------------------------------------------------------------------
  5385. 2025-05-17 23:52:45,383 - __main__ - INFO -
  5386. Worker ID | started
  5387. ----------+--------
  5388. 0 | 5
  5389. 2025-05-17 23:52:46,181 - sglang - INFO - [2025-05-17 23:52:46 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.45, #queue-req: 0
  5390. 2025-05-17 23:52:46,181 - __main__ - INFO - sglang running req: 5 queue req: 0
  5391. 2025-05-17 23:52:47,040 - sglang - INFO - [2025-05-17 23:52:47 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.66, #queue-req: 0
  5392. 2025-05-17 23:52:47,041 - __main__ - INFO - sglang running req: 5 queue req: 0
  5393. 2025-05-17 23:52:47,196 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5394. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 1 exiting due to empty queue
  5395. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 2 exiting due to empty queue
  5396. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 3 exiting due to empty queue
  5397. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 4 exiting due to empty queue
  5398. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 5 exiting due to empty queue
  5399. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 6 exiting due to empty queue
  5400. 2025-05-17 23:52:47,197 - __main__ - INFO - Worker 7 exiting due to empty queue
  5401. 2025-05-17 23:52:47,899 - sglang - INFO - [2025-05-17 23:52:47 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 232.95, #queue-req: 0
  5402. 2025-05-17 23:52:47,899 - __main__ - INFO - sglang running req: 5 queue req: 0
  5403. 2025-05-17 23:52:48,757 - sglang - INFO - [2025-05-17 23:52:48 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.08, #queue-req: 0
  5404. 2025-05-17 23:52:48,757 - __main__ - INFO - sglang running req: 5 queue req: 0
  5405. 2025-05-17 23:52:49,616 - sglang - INFO - [2025-05-17 23:52:49 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 232.85, #queue-req: 0
  5406. 2025-05-17 23:52:49,616 - __main__ - INFO - sglang running req: 5 queue req: 0
  5407. 2025-05-17 23:52:50,476 - sglang - INFO - [2025-05-17 23:52:50 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.65, #queue-req: 0
  5408. 2025-05-17 23:52:50,476 - __main__ - INFO - sglang running req: 5 queue req: 0
  5409. 2025-05-17 23:52:51,337 - sglang - INFO - [2025-05-17 23:52:51 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.18, #queue-req: 0
  5410. 2025-05-17 23:52:51,337 - __main__ - INFO - sglang running req: 5 queue req: 0
  5411. 2025-05-17 23:52:52,198 - sglang - INFO - [2025-05-17 23:52:52 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.21, #queue-req: 0
  5412. 2025-05-17 23:52:52,199 - __main__ - INFO - sglang running req: 5 queue req: 0
  5413. 2025-05-17 23:52:53,059 - sglang - INFO - [2025-05-17 23:52:53 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.39, #queue-req: 0
  5414. 2025-05-17 23:52:53,059 - __main__ - INFO - sglang running req: 4 queue req: 0
  5415. 2025-05-17 23:52:53,902 - sglang - INFO - [2025-05-17 23:52:53 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 145.75, #queue-req: 0
  5416. 2025-05-17 23:52:53,903 - __main__ - INFO - sglang running req: 3 queue req: 0
  5417. 2025-05-17 23:52:54,737 - sglang - INFO - [2025-05-17 23:52:54 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 94.63, #queue-req: 0
  5418. 2025-05-17 23:52:54,737 - __main__ - INFO - sglang running req: 1 queue req: 0
  5419. 2025-05-17 23:52:55,384 - __main__ - INFO - Queue remaining: 0
  5420. 2025-05-17 23:52:55,384 - __main__ - INFO -
  5421. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5422. ----------------------------------------------------------------------------------
  5423. sglang_input_tokens 92.17 92.17
  5424. sglang_output_tokens 18.08 18.08
  5425. 2025-05-17 23:52:55,384 - __main__ - INFO -
  5426. Worker ID | finished | started
  5427. ----------+----------+--------
  5428. 0 | 4 | 5
  5429. 2025-05-17 23:52:55,562 - sglang - INFO - [2025-05-17 23:52:55 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.48, #queue-req: 0
  5430. 2025-05-17 23:52:55,563 - __main__ - INFO - sglang running req: 1 queue req: 0
  5431. 2025-05-17 23:52:55,961 - __main__ - INFO - Finished TaskGroup for worker on e583124473577446455a2982cc1a1469d21fc0a1
  5432. 2025-05-17 23:52:55,961 - __main__ - INFO - Got 1 docs for e583124473577446455a2982cc1a1469d21fc0a1
  5433. 2025-05-17 23:52:55,963 - __main__ - INFO - Worker 0 exiting due to empty queue
  5434. 2025-05-17 23:52:55,963 - __main__ - INFO - Work done
  5435. 2025-05-17 23:52:55,963 - __main__ - INFO - Got cancellation request for SGLang server
  5436. 2025-05-17 23:58:19,645 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5437. 2025-05-17 23:58:19,645 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497493/input.pdf as PDF document
  5438. 2025-05-17 23:58:19,645 - __main__ - INFO - Found 1 total pdf paths to add
  5439. 2025-05-17 23:58:19,648 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  5440. 2025-05-17 23:58:19,843 - __main__ - INFO - Starting pipeline with PID 468563
  5441. 2025-05-17 23:58:19,843 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5442. 2025-05-17 23:58:20,381 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5443. 2025-05-17 23:58:21,419 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5444. 2025-05-17 23:58:22,477 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5445. 2025-05-17 23:58:23,522 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5446. 2025-05-17 23:58:24,584 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5447. 2025-05-17 23:58:25,660 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5448. 2025-05-17 23:58:26,623 - sglang - INFO - [2025-05-17 23:58:26] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=124456914, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5449. 2025-05-17 23:58:26,623 - __main__ - INFO - [2025-05-17 23:58:26] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=124456914, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5450. 2025-05-17 23:58:26,786 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5451. 2025-05-17 23:58:27,846 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5452. 2025-05-17 23:58:28,891 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5453. 2025-05-17 23:58:29,957 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5454. 2025-05-17 23:58:31,027 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5455. 2025-05-17 23:58:32,094 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5456. 2025-05-17 23:58:33,162 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5457. 2025-05-17 23:58:34,229 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5458. 2025-05-17 23:58:35,295 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5459. 2025-05-17 23:58:36,208 - sglang - INFO - [2025-05-17 23:58:36 TP0] Overlap scheduler is disabled for multimodal models.
  5460. 2025-05-17 23:58:36,209 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Overlap scheduler is disabled for multimodal models.
  5461. 2025-05-17 23:58:36,370 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5462. 2025-05-17 23:58:36,716 - sglang - INFO - [2025-05-17 23:58:36 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5463. 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5464. 2025-05-17 23:58:36,717 - sglang - INFO - [2025-05-17 23:58:36 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5465. 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5466. 2025-05-17 23:58:36,717 - sglang - INFO - [2025-05-17 23:58:36 TP0] Init torch distributed begin.
  5467. 2025-05-17 23:58:36,717 - __main__ - INFO - [2025-05-17 23:58:36 TP0] Init torch distributed begin.
  5468. 2025-05-17 23:58:37,444 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5469. 2025-05-17 23:58:38,498 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5470. 2025-05-17 23:58:39,563 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5471. 2025-05-17 23:58:40,618 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5472. 2025-05-17 23:58:41,684 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5473. 2025-05-17 23:58:42,052 - sglang - INFO - [2025-05-17 23:58:42 TP0] Load weight begin. avail mem=23.33 GB
  5474. 2025-05-17 23:58:42,052 - __main__ - INFO - [2025-05-17 23:58:42 TP0] Load weight begin. avail mem=23.33 GB
  5475. 2025-05-17 23:58:42,764 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5476. 2025-05-17 23:58:43,224 - sglang - INFO - [2025-05-17 23:58:43 TP0] Using model weights format ['*.safetensors']
  5477. 2025-05-17 23:58:43,224 - __main__ - INFO - [2025-05-17 23:58:43 TP0] Using model weights format ['*.safetensors']
  5478. 2025-05-17 23:58:43,844 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5479. 2025-05-17 23:58:44,325 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5480. 2025-05-17 23:58:44,325 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5481. 2025-05-17 23:58:44,615 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.46it/s]
  5482. 2025-05-17 23:58:44,615 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.46it/s]
  5483. 2025-05-17 23:58:44,923 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5484. 2025-05-17 23:58:45,329 - sglang - INFO - [2025-05-17 23:58:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5485. 2025-05-17 23:58:45,330 - __main__ - INFO - [2025-05-17 23:58:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5486. 2025-05-17 23:58:45,559 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.48it/s]
  5487. 2025-05-17 23:58:45,559 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.48it/s]
  5488. 2025-05-17 23:58:46,002 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  5489. 2025-05-17 23:58:46,501 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
  5490. 2025-05-17 23:58:46,501 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.25it/s]
  5491. 2025-05-17 23:58:47,082 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  5492. 2025-05-17 23:58:47,433 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.18it/s]
  5493. 2025-05-17 23:58:47,434 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.18it/s]
  5494. 2025-05-17 23:58:47,434 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
  5495. 2025-05-17 23:58:47,434 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
  5496. 2025-05-17 23:58:47,434 - sglang - INFO -
  5497. 2025-05-17 23:58:47,434 - __main__ - INFO -
  5498. 2025-05-17 23:58:47,582 - sglang - INFO - [2025-05-17 23:58:47 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5499. 2025-05-17 23:58:47,582 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5500. 2025-05-17 23:58:47,589 - sglang - INFO - [2025-05-17 23:58:47 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5501. 2025-05-17 23:58:47,589 - __main__ - INFO - [2025-05-17 23:58:47 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5502. 2025-05-17 23:58:47,589 - sglang - INFO - [2025-05-17 23:58:47 TP0] Memory pool end. avail mem=5.30 GB
  5503. 2025-05-17 23:58:47,589 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Memory pool end. avail mem=5.30 GB
  5504. 2025-05-17 23:58:47,769 - sglang - INFO - [2025-05-17 23:58:47 TP0] Capture cuda graph begin. This can take up to several minutes.
  5505. 2025-05-17 23:58:47,769 - __main__ - INFO - [2025-05-17 23:58:47 TP0] Capture cuda graph begin. This can take up to several minutes.
  5506. 2025-05-17 23:58:48,161 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  5507. 2025-05-17 23:58:49,239 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  5508. 2025-05-17 23:58:49,501 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.01it/s] 50%|█████ | 2/4 [00:01<00:01, 1.81it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s] 100%|██████████| 4/4 [00:01<00:00, 2.88it/s] 100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
  5509. 2025-05-17 23:58:49,501 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.01it/s] 50%|█████ | 2/4 [00:01<00:01, 1.81it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s] 100%|██████████| 4/4 [00:01<00:00, 2.88it/s] 100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
  5510. 2025-05-17 23:58:49,501 - sglang - INFO - [2025-05-17 23:58:49 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  5511. 2025-05-17 23:58:49,501 - __main__ - INFO - [2025-05-17 23:58:49 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  5512. 2025-05-17 23:58:50,318 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  5513. 2025-05-17 23:58:51,389 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  5514. 2025-05-17 23:58:52,182 - sglang - INFO - [2025-05-17 23:58:52 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5515. 2025-05-17 23:58:52,182 - __main__ - INFO - [2025-05-17 23:58:52 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5516. 2025-05-17 23:58:52,483 - __main__ - INFO - sglang server is ready.
  5517. 2025-05-17 23:58:52,483 - __main__ - INFO - Queue remaining: 1
  5518. 2025-05-17 23:58:52,483 - __main__ - INFO -
  5519. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5520. ----------------------------------------------------------------------------------
  5521. 2025-05-17 23:58:52,483 - __main__ - INFO -
  5522. Worker ID
  5523. ---------
  5524. 2025-05-17 23:58:52,483 - __main__ - INFO - Worker 0 processing work item a4f0675d63ce13f5a08d86042553d3bccd4ce38c
  5525. 2025-05-17 23:58:52,484 - __main__ - INFO - Created all tasks for a4f0675d63ce13f5a08d86042553d3bccd4ce38c
  5526. 2025-05-17 23:58:52,486 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747497493/input.pdf in worker 0
  5527. 2025-05-17 23:58:53,285 - sglang - INFO - [2025-05-17 23:58:53 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5528. 2025-05-17 23:58:53,285 - __main__ - INFO - [2025-05-17 23:58:53 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5529. 2025-05-17 23:58:53,285 - __main__ - INFO - sglang running req: 0 queue req: 0
  5530. 2025-05-17 23:58:53,916 - sglang - INFO - [2025-05-17 23:58:53] The server is fired up and ready to roll!
  5531. 2025-05-17 23:58:53,917 - __main__ - INFO - [2025-05-17 23:58:53] The server is fired up and ready to roll!
  5532. 2025-05-17 23:58:58,742 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497493/input.pdf-1
  5533. 2025-05-17 23:59:02,484 - __main__ - INFO - Queue remaining: 0
  5534. 2025-05-17 23:59:02,485 - __main__ - INFO -
  5535. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5536. ----------------------------------------------------------------------------------
  5537. 2025-05-17 23:59:02,485 - __main__ - INFO -
  5538. Worker ID | started
  5539. ----------+--------
  5540. 0 | 1
  5541. 2025-05-17 23:59:12,486 - __main__ - INFO - Queue remaining: 0
  5542. 2025-05-17 23:59:12,487 - __main__ - INFO -
  5543. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5544. ----------------------------------------------------------------------------------
  5545. 2025-05-17 23:59:12,487 - __main__ - INFO -
  5546. Worker ID | started
  5547. ----------+--------
  5548. 0 | 1
  5549. 2025-05-17 23:59:20,219 - sglang - INFO - [2025-05-17 23:59:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5550. 2025-05-17 23:59:20,219 - __main__ - INFO - sglang running req: 0 queue req: 0
  5551. 2025-05-17 23:59:21,653 - sglang - INFO - [2025-05-17 23:59:21 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.36, #queue-req: 0
  5552. 2025-05-17 23:59:21,654 - __main__ - INFO - sglang running req: 1 queue req: 0
  5553. 2025-05-17 23:59:22,472 - sglang - INFO - [2025-05-17 23:59:22 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.89, #queue-req: 0
  5554. 2025-05-17 23:59:22,472 - __main__ - INFO - sglang running req: 1 queue req: 0
  5555. 2025-05-17 23:59:22,487 - __main__ - INFO - Queue remaining: 0
  5556. 2025-05-17 23:59:22,487 - __main__ - INFO -
  5557. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5558. ----------------------------------------------------------------------------------
  5559. 2025-05-17 23:59:22,488 - __main__ - INFO -
  5560. Worker ID | started
  5561. ----------+--------
  5562. 0 | 1
  5563. 2025-05-17 23:59:23,289 - sglang - INFO - [2025-05-17 23:59:23 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.94, #queue-req: 0
  5564. 2025-05-17 23:59:23,289 - __main__ - INFO - sglang running req: 1 queue req: 0
  5565. 2025-05-17 23:59:24,106 - sglang - INFO - [2025-05-17 23:59:24 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.93, #queue-req: 0
  5566. 2025-05-17 23:59:24,107 - __main__ - INFO - sglang running req: 1 queue req: 0
  5567. 2025-05-17 23:59:24,590 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5568. 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 1 exiting due to empty queue
  5569. 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 2 exiting due to empty queue
  5570. 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 3 exiting due to empty queue
  5571. 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 4 exiting due to empty queue
  5572. 2025-05-17 23:59:24,591 - __main__ - INFO - Worker 5 exiting due to empty queue
  5573. 2025-05-17 23:59:24,592 - __main__ - INFO - Worker 6 exiting due to empty queue
  5574. 2025-05-17 23:59:24,592 - __main__ - INFO - Worker 7 exiting due to empty queue
  5575. 2025-05-17 23:59:24,924 - sglang - INFO - [2025-05-17 23:59:24 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.90, #queue-req: 0
  5576. 2025-05-17 23:59:24,924 - __main__ - INFO - sglang running req: 1 queue req: 0
  5577. 2025-05-17 23:59:25,743 - sglang - INFO - [2025-05-17 23:59:25 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.83, #queue-req: 0
  5578. 2025-05-17 23:59:25,744 - __main__ - INFO - sglang running req: 1 queue req: 0
  5579. 2025-05-17 23:59:26,563 - sglang - INFO - [2025-05-17 23:59:26 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.79, #queue-req: 0
  5580. 2025-05-17 23:59:26,563 - __main__ - INFO - sglang running req: 1 queue req: 0
  5581. 2025-05-17 23:59:26,621 - __main__ - INFO - Finished TaskGroup for worker on a4f0675d63ce13f5a08d86042553d3bccd4ce38c
  5582. 2025-05-17 23:59:26,621 - __main__ - INFO - Got 1 docs for a4f0675d63ce13f5a08d86042553d3bccd4ce38c
  5583. 2025-05-17 23:59:26,622 - __main__ - INFO - Worker 0 exiting due to empty queue
  5584. 2025-05-17 23:59:26,622 - __main__ - INFO - Work done
  5585. 2025-05-17 23:59:26,623 - __main__ - INFO - Got cancellation request for SGLang server
  5586. 2025-05-17 23:59:56,702 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5587. 2025-05-17 23:59:56,702 - __main__ - INFO - Loading file at olmocr_workspace/job_1747497590/input.pdf as PDF document
  5588. 2025-05-17 23:59:56,702 - __main__ - INFO - Found 1 total pdf paths to add
  5589. 2025-05-17 23:59:56,706 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  5590. 2025-05-17 23:59:56,924 - __main__ - INFO - Starting pipeline with PID 469586
  5591. 2025-05-17 23:59:56,924 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5592. 2025-05-18 00:00:02,591 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5593. 2025-05-18 00:00:03,631 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5594. 2025-05-18 00:00:04,668 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5595. 2025-05-18 00:00:05,721 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5596. 2025-05-18 00:00:06,782 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5597. 2025-05-18 00:00:07,847 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5598. 2025-05-18 00:00:08,914 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5599. 2025-05-18 00:00:09,253 - sglang - INFO - [2025-05-18 00:00:09] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=781311356, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5600. 2025-05-18 00:00:09,253 - __main__ - INFO - [2025-05-18 00:00:09] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=781311356, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5601. 2025-05-18 00:00:09,993 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5602. 2025-05-18 00:00:11,053 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5603. 2025-05-18 00:00:12,129 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5604. 2025-05-18 00:00:13,193 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5605. 2025-05-18 00:00:13,682 - sglang - INFO - [2025-05-18 00:00:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5606. 2025-05-18 00:00:13,682 - __main__ - INFO - [2025-05-18 00:00:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5607. 2025-05-18 00:00:14,269 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5608. 2025-05-18 00:00:15,400 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5609. 2025-05-18 00:00:16,460 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5610. 2025-05-18 00:00:17,527 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5611. 2025-05-18 00:00:18,598 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5612. 2025-05-18 00:00:19,666 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5613. 2025-05-18 00:00:20,728 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5614. 2025-05-18 00:00:21,782 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5615. 2025-05-18 00:00:22,844 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5616. 2025-05-18 00:00:23,909 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5617. 2025-05-18 00:00:24,973 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5618. 2025-05-18 00:00:26,044 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5619. 2025-05-18 00:00:26,297 - sglang - INFO - [2025-05-18 00:00:26 TP0] Overlap scheduler is disabled for multimodal models.
  5620. 2025-05-18 00:00:26,297 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Overlap scheduler is disabled for multimodal models.
  5621. 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5622. 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5623. 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5624. 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5625. 2025-05-18 00:00:26,931 - sglang - INFO - [2025-05-18 00:00:26 TP0] Init torch distributed begin.
  5626. 2025-05-18 00:00:26,931 - __main__ - INFO - [2025-05-18 00:00:26 TP0] Init torch distributed begin.
  5627. 2025-05-18 00:00:27,124 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5628. 2025-05-18 00:00:28,194 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  5629. 2025-05-18 00:00:29,264 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  5630. 2025-05-18 00:00:30,330 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  5631. 2025-05-18 00:00:31,400 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  5632. 2025-05-18 00:00:32,319 - sglang - INFO - [2025-05-18 00:00:32 TP0] Load weight begin. avail mem=23.33 GB
  5633. 2025-05-18 00:00:32,319 - __main__ - INFO - [2025-05-18 00:00:32 TP0] Load weight begin. avail mem=23.33 GB
  5634. 2025-05-18 00:00:32,479 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  5635. 2025-05-18 00:00:33,364 - sglang - INFO - [2025-05-18 00:00:33 TP0] Using model weights format ['*.safetensors']
  5636. 2025-05-18 00:00:33,364 - __main__ - INFO - [2025-05-18 00:00:33 TP0] Using model weights format ['*.safetensors']
  5637. 2025-05-18 00:00:33,557 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  5638. 2025-05-18 00:00:34,334 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5639. 2025-05-18 00:00:34,335 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5640. 2025-05-18 00:00:34,636 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  5641. 2025-05-18 00:00:34,689 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.82it/s]
  5642. 2025-05-18 00:00:34,689 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.82it/s]
  5643. 2025-05-18 00:00:35,716 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  5644. 2025-05-18 00:00:35,867 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
  5645. 2025-05-18 00:00:35,867 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
  5646. 2025-05-18 00:00:36,796 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  5647. 2025-05-18 00:00:37,063 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.00s/it]
  5648. 2025-05-18 00:00:37,063 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:01, 1.00s/it]
  5649. 2025-05-18 00:00:37,865 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  5650. 2025-05-18 00:00:38,194 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05s/it]
  5651. 2025-05-18 00:00:38,194 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05s/it]
  5652. 2025-05-18 00:00:38,194 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
  5653. 2025-05-18 00:00:38,194 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.04it/s]
  5654. 2025-05-18 00:00:38,194 - sglang - INFO -
  5655. 2025-05-18 00:00:38,194 - __main__ - INFO -
  5656. 2025-05-18 00:00:38,370 - sglang - INFO - [2025-05-18 00:00:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5657. 2025-05-18 00:00:38,370 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5658. 2025-05-18 00:00:38,376 - sglang - INFO - [2025-05-18 00:00:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5659. 2025-05-18 00:00:38,376 - __main__ - INFO - [2025-05-18 00:00:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5660. 2025-05-18 00:00:38,376 - sglang - INFO - [2025-05-18 00:00:38 TP0] Memory pool end. avail mem=5.30 GB
  5661. 2025-05-18 00:00:38,376 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Memory pool end. avail mem=5.30 GB
  5662. 2025-05-18 00:00:38,525 - sglang - INFO - [2025-05-18 00:00:38 TP0] Capture cuda graph begin. This can take up to several minutes.
  5663. 2025-05-18 00:00:38,525 - __main__ - INFO - [2025-05-18 00:00:38 TP0] Capture cuda graph begin. This can take up to several minutes.
  5664. 2025-05-18 00:00:38,945 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  5665. 2025-05-18 00:00:40,023 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  5666. 2025-05-18 00:00:40,202 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.88it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.96it/s] 100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
  5667. 2025-05-18 00:00:40,203 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.88it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.96it/s] 100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
  5668. 2025-05-18 00:00:40,203 - sglang - INFO - [2025-05-18 00:00:40 TP0] Capture cuda graph end. Time elapsed: 1.68 s
  5669. 2025-05-18 00:00:40,203 - __main__ - INFO - [2025-05-18 00:00:40 TP0] Capture cuda graph end. Time elapsed: 1.68 s
  5670. 2025-05-18 00:00:41,102 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  5671. 2025-05-18 00:00:42,172 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  5672. 2025-05-18 00:00:42,689 - sglang - INFO - [2025-05-18 00:00:42 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5673. 2025-05-18 00:00:42,689 - __main__ - INFO - [2025-05-18 00:00:42 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5674. 2025-05-18 00:00:43,271 - __main__ - INFO - sglang server is ready.
  5675. 2025-05-18 00:00:43,271 - __main__ - INFO - Queue remaining: 1
  5676. 2025-05-18 00:00:43,271 - __main__ - INFO -
  5677. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5678. ----------------------------------------------------------------------------------
  5679. 2025-05-18 00:00:43,271 - __main__ - INFO -
  5680. Worker ID
  5681. ---------
  5682. 2025-05-18 00:00:43,271 - __main__ - INFO - Worker 0 processing work item 91f602739df6407104cadbe51df97c7f32677f88
  5683. 2025-05-18 00:00:43,272 - __main__ - INFO - Created all tasks for 91f602739df6407104cadbe51df97c7f32677f88
  5684. 2025-05-18 00:00:43,278 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747497590/input.pdf in worker 0
  5685. 2025-05-18 00:00:43,762 - sglang - INFO - [2025-05-18 00:00:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5686. 2025-05-18 00:00:43,763 - __main__ - INFO - [2025-05-18 00:00:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5687. 2025-05-18 00:00:43,763 - __main__ - INFO - sglang running req: 0 queue req: 0
  5688. 2025-05-18 00:00:44,650 - sglang - INFO - [2025-05-18 00:00:44] The server is fired up and ready to roll!
  5689. 2025-05-18 00:00:44,650 - __main__ - INFO - [2025-05-18 00:00:44] The server is fired up and ready to roll!
  5690. 2025-05-18 00:00:49,546 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-1
  5691. 2025-05-18 00:00:49,583 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-2
  5692. 2025-05-18 00:00:49,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-3
  5693. 2025-05-18 00:00:49,645 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-4
  5694. 2025-05-18 00:00:49,677 - __main__ - INFO - Built page query for olmocr_workspace/job_1747497590/input.pdf-5
  5695. 2025-05-18 00:00:53,279 - __main__ - INFO - Queue remaining: 0
  5696. 2025-05-18 00:00:53,279 - __main__ - INFO -
  5697. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5698. ----------------------------------------------------------------------------------
  5699. 2025-05-18 00:00:53,279 - __main__ - INFO -
  5700. Worker ID | started
  5701. ----------+--------
  5702. 0 | 5
  5703. 2025-05-18 00:01:03,200 - sglang - INFO - [2025-05-18 00:01:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5704. 2025-05-18 00:01:03,200 - __main__ - INFO - sglang running req: 0 queue req: 0
  5705. 2025-05-18 00:01:03,279 - __main__ - INFO - Queue remaining: 0
  5706. 2025-05-18 00:01:03,279 - __main__ - INFO -
  5707. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5708. ----------------------------------------------------------------------------------
  5709. 2025-05-18 00:01:03,279 - __main__ - INFO -
  5710. Worker ID | started
  5711. ----------+--------
  5712. 0 | 5
  5713. 2025-05-18 00:01:05,382 - sglang - INFO - [2025-05-18 00:01:05 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  5714. 2025-05-18 00:01:05,382 - __main__ - INFO - sglang running req: 1 queue req: 0
  5715. 2025-05-18 00:01:09,215 - sglang - INFO - [2025-05-18 00:01:09 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.48, #queue-req: 0
  5716. 2025-05-18 00:01:09,215 - __main__ - INFO - sglang running req: 5 queue req: 0
  5717. 2025-05-18 00:01:10,074 - sglang - INFO - [2025-05-18 00:01:10 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 232.81, #queue-req: 0
  5718. 2025-05-18 00:01:10,074 - __main__ - INFO - sglang running req: 5 queue req: 0
  5719. 2025-05-18 00:01:10,932 - sglang - INFO - [2025-05-18 00:01:10 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.21, #queue-req: 0
  5720. 2025-05-18 00:01:10,932 - __main__ - INFO - sglang running req: 5 queue req: 0
  5721. 2025-05-18 00:01:11,789 - sglang - INFO - [2025-05-18 00:01:11 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.30, #queue-req: 0
  5722. 2025-05-18 00:01:11,789 - __main__ - INFO - sglang running req: 5 queue req: 0
  5723. 2025-05-18 00:01:12,647 - sglang - INFO - [2025-05-18 00:01:12 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.19, #queue-req: 0
  5724. 2025-05-18 00:01:12,647 - __main__ - INFO - sglang running req: 5 queue req: 0
  5725. 2025-05-18 00:01:13,280 - __main__ - INFO - Queue remaining: 0
  5726. 2025-05-18 00:01:13,281 - __main__ - INFO -
  5727. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5728. ----------------------------------------------------------------------------------
  5729. 2025-05-18 00:01:13,281 - __main__ - INFO -
  5730. Worker ID | started
  5731. ----------+--------
  5732. 0 | 5
  5733. 2025-05-18 00:01:13,506 - sglang - INFO - [2025-05-18 00:01:13 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.77, #queue-req: 0
  5734. 2025-05-18 00:01:13,506 - __main__ - INFO - sglang running req: 5 queue req: 0
  5735. 2025-05-18 00:01:14,368 - sglang - INFO - [2025-05-18 00:01:14 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.07, #queue-req: 0
  5736. 2025-05-18 00:01:14,368 - __main__ - INFO - sglang running req: 5 queue req: 0
  5737. 2025-05-18 00:01:14,902 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5738. 2025-05-18 00:01:14,902 - __main__ - INFO - Worker 1 exiting due to empty queue
  5739. 2025-05-18 00:01:14,902 - __main__ - INFO - Worker 2 exiting due to empty queue
  5740. 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 3 exiting due to empty queue
  5741. 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 4 exiting due to empty queue
  5742. 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 5 exiting due to empty queue
  5743. 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 6 exiting due to empty queue
  5744. 2025-05-18 00:01:14,903 - __main__ - INFO - Worker 7 exiting due to empty queue
  5745. 2025-05-18 00:01:15,229 - sglang - INFO - [2025-05-18 00:01:15 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.09, #queue-req: 0
  5746. 2025-05-18 00:01:15,230 - __main__ - INFO - sglang running req: 5 queue req: 0
  5747. 2025-05-18 00:01:16,088 - sglang - INFO - [2025-05-18 00:01:16 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 217.68, #queue-req: 0
  5748. 2025-05-18 00:01:16,089 - __main__ - INFO - sglang running req: 3 queue req: 0
  5749. 2025-05-18 00:01:16,931 - sglang - INFO - [2025-05-18 00:01:16 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.45, #queue-req: 0
  5750. 2025-05-18 00:01:16,931 - __main__ - INFO - sglang running req: 3 queue req: 0
  5751. 2025-05-18 00:01:17,764 - sglang - INFO - [2025-05-18 00:01:17 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 84.03, #queue-req: 0
  5752. 2025-05-18 00:01:17,764 - __main__ - INFO - sglang running req: 1 queue req: 0
  5753. 2025-05-18 00:01:18,588 - sglang - INFO - [2025-05-18 00:01:18 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.54, #queue-req: 0
  5754. 2025-05-18 00:01:18,588 - __main__ - INFO - sglang running req: 1 queue req: 0
  5755. 2025-05-18 00:01:19,316 - __main__ - INFO - Finished TaskGroup for worker on 91f602739df6407104cadbe51df97c7f32677f88
  5756. 2025-05-18 00:01:19,317 - __main__ - INFO - Got 1 docs for 91f602739df6407104cadbe51df97c7f32677f88
  5757. 2025-05-18 00:01:19,318 - __main__ - INFO - Worker 0 exiting due to empty queue
  5758. 2025-05-18 00:01:19,318 - __main__ - INFO - Work done
  5759. 2025-05-18 00:01:19,319 - __main__ - INFO - Got cancellation request for SGLang server
  5760. 2025-05-18 10:13:07,994 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5761. 2025-05-18 10:13:07,994 - __main__ - INFO - Loading file at olmocr_workspace/job_1747534381/input.pdf as PDF document
  5762. 2025-05-18 10:13:07,994 - __main__ - INFO - Found 1 total pdf paths to add
  5763. 2025-05-18 10:13:07,998 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  5764. 2025-05-18 10:13:08,244 - __main__ - INFO - Starting pipeline with PID 481106
  5765. 2025-05-18 10:13:08,245 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5766. 2025-05-18 10:13:13,846 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5767. 2025-05-18 10:13:14,887 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5768. 2025-05-18 10:13:15,935 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5769. 2025-05-18 10:13:17,000 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5770. 2025-05-18 10:13:18,069 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5771. 2025-05-18 10:13:19,139 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5772. 2025-05-18 10:13:19,893 - sglang - INFO - [2025-05-18 10:13:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=969455633, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5773. 2025-05-18 10:13:19,893 - __main__ - INFO - [2025-05-18 10:13:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=969455633, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5774. 2025-05-18 10:13:20,216 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5775. 2025-05-18 10:13:21,285 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5776. 2025-05-18 10:13:22,347 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5777. 2025-05-18 10:13:23,412 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5778. 2025-05-18 10:13:24,480 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5779. 2025-05-18 10:13:25,549 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5780. 2025-05-18 10:13:26,620 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5781. 2025-05-18 10:13:27,689 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5782. 2025-05-18 10:13:28,755 - sglang - INFO - [2025-05-18 10:13:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5783. 2025-05-18 10:13:28,755 - __main__ - INFO - [2025-05-18 10:13:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5784. 2025-05-18 10:13:28,756 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5785. 2025-05-18 10:13:29,831 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5786. 2025-05-18 10:13:30,896 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5787. 2025-05-18 10:13:31,950 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5788. 2025-05-18 10:13:33,016 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5789. 2025-05-18 10:13:33,999 - sglang - INFO - [2025-05-18 10:13:33 TP0] Overlap scheduler is disabled for multimodal models.
  5790. 2025-05-18 10:13:33,999 - __main__ - INFO - [2025-05-18 10:13:33 TP0] Overlap scheduler is disabled for multimodal models.
  5791. 2025-05-18 10:13:34,093 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5792. 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5793. 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5794. 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5795. 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5796. 2025-05-18 10:13:34,497 - sglang - INFO - [2025-05-18 10:13:34 TP0] Init torch distributed begin.
  5797. 2025-05-18 10:13:34,497 - __main__ - INFO - [2025-05-18 10:13:34 TP0] Init torch distributed begin.
  5798. 2025-05-18 10:13:35,172 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5799. 2025-05-18 10:13:36,241 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5800. 2025-05-18 10:13:37,311 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5801. 2025-05-18 10:13:38,380 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5802. 2025-05-18 10:13:39,447 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  5803. 2025-05-18 10:13:39,850 - sglang - INFO - [2025-05-18 10:13:39 TP0] Load weight begin. avail mem=23.33 GB
  5804. 2025-05-18 10:13:39,851 - __main__ - INFO - [2025-05-18 10:13:39 TP0] Load weight begin. avail mem=23.33 GB
  5805. 2025-05-18 10:13:40,517 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  5806. 2025-05-18 10:13:40,953 - sglang - INFO - [2025-05-18 10:13:40 TP0] Using model weights format ['*.safetensors']
  5807. 2025-05-18 10:13:40,954 - __main__ - INFO - [2025-05-18 10:13:40 TP0] Using model weights format ['*.safetensors']
  5808. 2025-05-18 10:13:41,434 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5809. 2025-05-18 10:13:41,434 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5810. 2025-05-18 10:13:41,596 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  5811. 2025-05-18 10:13:41,753 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.14it/s]
  5812. 2025-05-18 10:13:41,753 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.14it/s]
  5813. 2025-05-18 10:13:42,677 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  5814. 2025-05-18 10:13:42,796 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.34it/s]
  5815. 2025-05-18 10:13:42,796 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.34it/s]
  5816. 2025-05-18 10:13:43,757 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  5817. 2025-05-18 10:13:43,827 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.14it/s]
  5818. 2025-05-18 10:13:43,827 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.14it/s]
  5819. 2025-05-18 10:13:44,835 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
  5820. 2025-05-18 10:13:44,835 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
  5821. 2025-05-18 10:13:44,836 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
  5822. 2025-05-18 10:13:44,836 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
  5823. 2025-05-18 10:13:44,836 - sglang - INFO -
  5824. 2025-05-18 10:13:44,836 - __main__ - INFO -
  5825. 2025-05-18 10:13:44,837 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  5826. 2025-05-18 10:13:44,914 - sglang - INFO - [2025-05-18 10:13:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5827. 2025-05-18 10:13:44,914 - __main__ - INFO - [2025-05-18 10:13:44 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  5828. 2025-05-18 10:13:44,920 - sglang - INFO - [2025-05-18 10:13:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5829. 2025-05-18 10:13:44,920 - __main__ - INFO - [2025-05-18 10:13:44 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  5830. 2025-05-18 10:13:44,921 - sglang - INFO - [2025-05-18 10:13:44 TP0] Memory pool end. avail mem=5.30 GB
  5831. 2025-05-18 10:13:44,921 - __main__ - INFO - [2025-05-18 10:13:44 TP0] Memory pool end. avail mem=5.30 GB
  5832. 2025-05-18 10:13:45,072 - sglang - INFO - [2025-05-18 10:13:45 TP0] Capture cuda graph begin. This can take up to several minutes.
  5833. 2025-05-18 10:13:45,073 - __main__ - INFO - [2025-05-18 10:13:45 TP0] Capture cuda graph begin. This can take up to several minutes.
  5834. 2025-05-18 10:13:45,917 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  5835. 2025-05-18 10:13:46,750 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.88it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.95it/s] 100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
  5836. 2025-05-18 10:13:46,750 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.06it/s] 50%|█████ | 2/4 [00:01<00:01, 1.88it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.95it/s] 100%|██████████| 4/4 [00:01<00:00, 2.39it/s]
  5837. 2025-05-18 10:13:46,750 - sglang - INFO - [2025-05-18 10:13:46 TP0] Capture cuda graph end. Time elapsed: 1.68 s
  5838. 2025-05-18 10:13:46,750 - __main__ - INFO - [2025-05-18 10:13:46 TP0] Capture cuda graph end. Time elapsed: 1.68 s
  5839. 2025-05-18 10:13:46,996 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  5840. 2025-05-18 10:13:48,078 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  5841. 2025-05-18 10:13:49,135 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  5842. 2025-05-18 10:13:49,359 - sglang - INFO - [2025-05-18 10:13:49 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5843. 2025-05-18 10:13:49,359 - __main__ - INFO - [2025-05-18 10:13:49 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  5844. 2025-05-18 10:13:50,208 - __main__ - INFO - sglang server is ready.
  5845. 2025-05-18 10:13:50,208 - __main__ - INFO - Queue remaining: 1
  5846. 2025-05-18 10:13:50,209 - __main__ - INFO -
  5847. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5848. ----------------------------------------------------------------------------------
  5849. 2025-05-18 10:13:50,209 - __main__ - INFO -
  5850. Worker ID
  5851. ---------
  5852. 2025-05-18 10:13:50,209 - __main__ - INFO - Worker 0 processing work item 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
  5853. 2025-05-18 10:13:50,209 - __main__ - INFO - Created all tasks for 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
  5854. 2025-05-18 10:13:50,215 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747534381/input.pdf in worker 0
  5855. 2025-05-18 10:13:50,436 - sglang - INFO - [2025-05-18 10:13:50 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5856. 2025-05-18 10:13:50,436 - __main__ - INFO - [2025-05-18 10:13:50 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5857. 2025-05-18 10:13:50,436 - __main__ - INFO - sglang running req: 0 queue req: 0
  5858. 2025-05-18 10:13:51,083 - sglang - INFO - [2025-05-18 10:13:51] The server is fired up and ready to roll!
  5859. 2025-05-18 10:13:51,083 - __main__ - INFO - [2025-05-18 10:13:51] The server is fired up and ready to roll!
  5860. 2025-05-18 10:13:56,725 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-1
  5861. 2025-05-18 10:13:56,740 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-2
  5862. 2025-05-18 10:13:56,769 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-3
  5863. 2025-05-18 10:13:56,776 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-4
  5864. 2025-05-18 10:13:56,800 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534381/input.pdf-5
  5865. 2025-05-18 10:14:00,279 - __main__ - INFO - Queue remaining: 0
  5866. 2025-05-18 10:14:00,279 - __main__ - INFO -
  5867. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5868. ----------------------------------------------------------------------------------
  5869. 2025-05-18 10:14:00,280 - __main__ - INFO -
  5870. Worker ID | started
  5871. ----------+--------
  5872. 0 | 5
  5873. 2025-05-18 10:14:10,281 - __main__ - INFO - Queue remaining: 0
  5874. 2025-05-18 10:14:10,282 - __main__ - INFO -
  5875. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5876. ----------------------------------------------------------------------------------
  5877. 2025-05-18 10:14:10,282 - __main__ - INFO -
  5878. Worker ID | started
  5879. ----------+--------
  5880. 0 | 5
  5881. 2025-05-18 10:14:18,412 - sglang - INFO - [2025-05-18 10:14:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  5882. 2025-05-18 10:14:18,413 - __main__ - INFO - sglang running req: 0 queue req: 0
  5883. 2025-05-18 10:14:19,243 - sglang - INFO - [2025-05-18 10:14:19 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  5884. 2025-05-18 10:14:19,244 - __main__ - INFO - sglang running req: 1 queue req: 0
  5885. 2025-05-18 10:14:20,283 - __main__ - INFO - Queue remaining: 0
  5886. 2025-05-18 10:14:20,284 - __main__ - INFO -
  5887. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5888. ----------------------------------------------------------------------------------
  5889. 2025-05-18 10:14:20,284 - __main__ - INFO -
  5890. Worker ID | started
  5891. ----------+--------
  5892. 0 | 5
  5893. 2025-05-18 10:14:21,091 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  5894. 2025-05-18 10:14:21,091 - __main__ - INFO - Worker 1 exiting due to empty queue
  5895. 2025-05-18 10:14:21,091 - __main__ - INFO - Worker 2 exiting due to empty queue
  5896. 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 3 exiting due to empty queue
  5897. 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 4 exiting due to empty queue
  5898. 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 5 exiting due to empty queue
  5899. 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 6 exiting due to empty queue
  5900. 2025-05-18 10:14:21,092 - __main__ - INFO - Worker 7 exiting due to empty queue
  5901. 2025-05-18 10:14:22,685 - sglang - INFO - [2025-05-18 10:14:22 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 5.16, #queue-req: 0
  5902. 2025-05-18 10:14:22,685 - __main__ - INFO - sglang running req: 5 queue req: 0
  5903. 2025-05-18 10:14:23,541 - sglang - INFO - [2025-05-18 10:14:23 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.69, #queue-req: 0
  5904. 2025-05-18 10:14:23,541 - __main__ - INFO - sglang running req: 5 queue req: 0
  5905. 2025-05-18 10:14:24,396 - sglang - INFO - [2025-05-18 10:14:24 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.92, #queue-req: 0
  5906. 2025-05-18 10:14:24,396 - __main__ - INFO - sglang running req: 5 queue req: 0
  5907. 2025-05-18 10:14:25,252 - sglang - INFO - [2025-05-18 10:14:25 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.68, #queue-req: 0
  5908. 2025-05-18 10:14:25,252 - __main__ - INFO - sglang running req: 5 queue req: 0
  5909. 2025-05-18 10:14:26,108 - sglang - INFO - [2025-05-18 10:14:26 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.63, #queue-req: 0
  5910. 2025-05-18 10:14:26,108 - __main__ - INFO - sglang running req: 5 queue req: 0
  5911. 2025-05-18 10:14:26,966 - sglang - INFO - [2025-05-18 10:14:26 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 233.02, #queue-req: 0
  5912. 2025-05-18 10:14:26,966 - __main__ - INFO - sglang running req: 5 queue req: 0
  5913. 2025-05-18 10:14:27,827 - sglang - INFO - [2025-05-18 10:14:27 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 232.34, #queue-req: 0
  5914. 2025-05-18 10:14:27,827 - __main__ - INFO - sglang running req: 5 queue req: 0
  5915. 2025-05-18 10:14:28,688 - sglang - INFO - [2025-05-18 10:14:28 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 232.30, #queue-req: 0
  5916. 2025-05-18 10:14:28,688 - __main__ - INFO - sglang running req: 5 queue req: 0
  5917. 2025-05-18 10:14:29,546 - sglang - INFO - [2025-05-18 10:14:29 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 216.65, #queue-req: 0
  5918. 2025-05-18 10:14:29,546 - __main__ - INFO - sglang running req: 3 queue req: 0
  5919. 2025-05-18 10:14:30,285 - __main__ - INFO - Queue remaining: 0
  5920. 2025-05-18 10:14:30,285 - __main__ - INFO -
  5921. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  5922. ----------------------------------------------------------------------------------
  5923. sglang_input_tokens 48.89 48.89
  5924. sglang_output_tokens 8.43 8.43
  5925. 2025-05-18 10:14:30,285 - __main__ - INFO -
  5926. Worker ID | finished | started
  5927. ----------+----------+--------
  5928. 0 | 2 | 5
  5929. 2025-05-18 10:14:30,387 - sglang - INFO - [2025-05-18 10:14:30 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 142.78, #queue-req: 0
  5930. 2025-05-18 10:14:30,387 - __main__ - INFO - sglang running req: 3 queue req: 0
  5931. 2025-05-18 10:14:31,220 - sglang - INFO - [2025-05-18 10:14:31 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 81.63, #queue-req: 0
  5932. 2025-05-18 10:14:31,220 - __main__ - INFO - sglang running req: 1 queue req: 0
  5933. 2025-05-18 10:14:32,044 - sglang - INFO - [2025-05-18 10:14:32 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.54, #queue-req: 0
  5934. 2025-05-18 10:14:32,044 - __main__ - INFO - sglang running req: 1 queue req: 0
  5935. 2025-05-18 10:14:32,708 - __main__ - INFO - Finished TaskGroup for worker on 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
  5936. 2025-05-18 10:14:32,708 - __main__ - INFO - Got 1 docs for 5573a5a2ff993d9d69d55df0ecdfd1e871e0176e
  5937. 2025-05-18 10:14:32,709 - __main__ - INFO - Worker 0 exiting due to empty queue
  5938. 2025-05-18 10:14:32,710 - __main__ - INFO - Work done
  5939. 2025-05-18 10:14:32,710 - __main__ - INFO - Got cancellation request for SGLang server
  5940. 2025-05-18 10:18:39,289 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  5941. 2025-05-18 10:18:39,290 - __main__ - INFO - Loading file at olmocr_workspace/job_1747534713/input.pdf as PDF document
  5942. 2025-05-18 10:18:39,290 - __main__ - INFO - Found 1 total pdf paths to add
  5943. 2025-05-18 10:18:39,292 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  5944. 2025-05-18 10:18:39,534 - __main__ - INFO - Starting pipeline with PID 482470
  5945. 2025-05-18 10:18:39,534 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  5946. 2025-05-18 10:18:40,286 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  5947. 2025-05-18 10:18:41,332 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  5948. 2025-05-18 10:18:42,396 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  5949. 2025-05-18 10:18:43,465 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  5950. 2025-05-18 10:18:44,534 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  5951. 2025-05-18 10:18:45,592 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  5952. 2025-05-18 10:18:45,950 - sglang - INFO - [2025-05-18 10:18:45] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=432529288, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5953. 2025-05-18 10:18:45,950 - __main__ - INFO - [2025-05-18 10:18:45] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=432529288, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  5954. 2025-05-18 10:18:46,671 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  5955. 2025-05-18 10:18:47,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  5956. 2025-05-18 10:18:48,764 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  5957. 2025-05-18 10:18:49,809 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  5958. 2025-05-18 10:18:50,930 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  5959. 2025-05-18 10:18:51,972 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  5960. 2025-05-18 10:18:53,033 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  5961. 2025-05-18 10:18:54,100 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  5962. 2025-05-18 10:18:54,357 - sglang - INFO - [2025-05-18 10:18:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5963. 2025-05-18 10:18:54,357 - __main__ - INFO - [2025-05-18 10:18:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  5964. 2025-05-18 10:18:54,987 - sglang - INFO - [2025-05-18 10:18:54 TP0] Overlap scheduler is disabled for multimodal models.
  5965. 2025-05-18 10:18:54,987 - __main__ - INFO - [2025-05-18 10:18:54 TP0] Overlap scheduler is disabled for multimodal models.
  5966. 2025-05-18 10:18:55,178 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  5967. 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5968. 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  5969. 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5970. 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  5971. 2025-05-18 10:18:55,471 - sglang - INFO - [2025-05-18 10:18:55 TP0] Init torch distributed begin.
  5972. 2025-05-18 10:18:55,471 - __main__ - INFO - [2025-05-18 10:18:55 TP0] Init torch distributed begin.
  5973. 2025-05-18 10:18:56,256 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  5974. 2025-05-18 10:18:57,314 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  5975. 2025-05-18 10:18:58,348 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  5976. 2025-05-18 10:18:59,399 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  5977. 2025-05-18 10:19:00,468 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  5978. 2025-05-18 10:19:00,767 - sglang - INFO - [2025-05-18 10:19:00 TP0] Load weight begin. avail mem=23.33 GB
  5979. 2025-05-18 10:19:00,767 - __main__ - INFO - [2025-05-18 10:19:00 TP0] Load weight begin. avail mem=23.33 GB
  5980. 2025-05-18 10:19:01,547 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  5981. 2025-05-18 10:19:01,758 - sglang - INFO - [2025-05-18 10:19:01 TP0] Using model weights format ['*.safetensors']
  5982. 2025-05-18 10:19:01,758 - __main__ - INFO - [2025-05-18 10:19:01 TP0] Using model weights format ['*.safetensors']
  5983. 2025-05-18 10:19:02,285 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5984. 2025-05-18 10:19:02,286 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  5985. 2025-05-18 10:19:02,624 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.57it/s]
  5986. 2025-05-18 10:19:02,624 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.57it/s]
  5987. 2025-05-18 10:19:02,626 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  5988. 2025-05-18 10:19:03,506 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.50it/s]
  5989. 2025-05-18 10:19:03,507 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.50it/s]
  5990. 2025-05-18 10:19:03,705 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  5991. 2025-05-18 10:19:04,450 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
  5992. 2025-05-18 10:19:04,450 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.26it/s]
  5993. 2025-05-18 10:19:04,784 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  5994. 2025-05-18 10:19:05,365 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
  5995. 2025-05-18 10:19:05,365 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.19it/s]
  5996. 2025-05-18 10:19:05,365 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
  5997. 2025-05-18 10:19:05,365 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.30it/s]
  5998. 2025-05-18 10:19:05,365 - sglang - INFO -
  5999. 2025-05-18 10:19:05,365 - __main__ - INFO -
  6000. 2025-05-18 10:19:05,512 - sglang - INFO - [2025-05-18 10:19:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6001. 2025-05-18 10:19:05,512 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6002. 2025-05-18 10:19:05,547 - sglang - INFO - [2025-05-18 10:19:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6003. 2025-05-18 10:19:05,548 - __main__ - INFO - [2025-05-18 10:19:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6004. 2025-05-18 10:19:05,548 - sglang - INFO - [2025-05-18 10:19:05 TP0] Memory pool end. avail mem=5.30 GB
  6005. 2025-05-18 10:19:05,548 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Memory pool end. avail mem=5.30 GB
  6006. 2025-05-18 10:19:05,720 - sglang - INFO - [2025-05-18 10:19:05 TP0] Capture cuda graph begin. This can take up to several minutes.
  6007. 2025-05-18 10:19:05,721 - __main__ - INFO - [2025-05-18 10:19:05 TP0] Capture cuda graph begin. This can take up to several minutes.
  6008. 2025-05-18 10:19:05,862 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  6009. 2025-05-18 10:19:06,941 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  6010. 2025-05-18 10:19:07,448 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.81it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.41it/s] 100%|██████████| 4/4 [00:01<00:00, 2.87it/s] 100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
  6011. 2025-05-18 10:19:07,448 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.04it/s] 50%|█████ | 2/4 [00:01<00:01, 1.81it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.41it/s] 100%|██████████| 4/4 [00:01<00:00, 2.87it/s] 100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
  6012. 2025-05-18 10:19:07,448 - sglang - INFO - [2025-05-18 10:19:07 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  6013. 2025-05-18 10:19:07,448 - __main__ - INFO - [2025-05-18 10:19:07 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  6014. 2025-05-18 10:19:08,019 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  6015. 2025-05-18 10:19:09,089 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  6016. 2025-05-18 10:19:09,790 - sglang - INFO - [2025-05-18 10:19:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6017. 2025-05-18 10:19:09,790 - __main__ - INFO - [2025-05-18 10:19:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6018. 2025-05-18 10:19:10,181 - __main__ - INFO - sglang server is ready.
  6019. 2025-05-18 10:19:10,182 - __main__ - INFO - Queue remaining: 1
  6020. 2025-05-18 10:19:10,182 - __main__ - INFO -
  6021. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6022. ----------------------------------------------------------------------------------
  6023. 2025-05-18 10:19:10,182 - __main__ - INFO -
  6024. Worker ID
  6025. ---------
  6026. 2025-05-18 10:19:10,182 - __main__ - INFO - Worker 0 processing work item 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
  6027. 2025-05-18 10:19:10,182 - __main__ - INFO - Created all tasks for 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
  6028. 2025-05-18 10:19:10,184 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747534713/input.pdf in worker 0
  6029. 2025-05-18 10:19:10,863 - sglang - INFO - [2025-05-18 10:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6030. 2025-05-18 10:19:10,864 - __main__ - INFO - [2025-05-18 10:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6031. 2025-05-18 10:19:10,864 - __main__ - INFO - sglang running req: 0 queue req: 0
  6032. 2025-05-18 10:19:11,556 - sglang - INFO - [2025-05-18 10:19:11] The server is fired up and ready to roll!
  6033. 2025-05-18 10:19:11,556 - __main__ - INFO - [2025-05-18 10:19:11] The server is fired up and ready to roll!
  6034. 2025-05-18 10:19:16,469 - __main__ - INFO - Built page query for olmocr_workspace/job_1747534713/input.pdf-1
  6035. 2025-05-18 10:19:20,183 - __main__ - INFO - Queue remaining: 0
  6036. 2025-05-18 10:19:20,183 - __main__ - INFO -
  6037. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6038. ----------------------------------------------------------------------------------
  6039. 2025-05-18 10:19:20,183 - __main__ - INFO -
  6040. Worker ID | started
  6041. ----------+--------
  6042. 0 | 1
  6043. 2025-05-18 10:19:30,196 - __main__ - INFO - Queue remaining: 0
  6044. 2025-05-18 10:19:30,196 - __main__ - INFO -
  6045. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6046. ----------------------------------------------------------------------------------
  6047. 2025-05-18 10:19:30,196 - __main__ - INFO -
  6048. Worker ID | started
  6049. ----------+--------
  6050. 0 | 1
  6051. 2025-05-18 10:19:37,626 - sglang - INFO - [2025-05-18 10:19:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6052. 2025-05-18 10:19:37,627 - __main__ - INFO - sglang running req: 0 queue req: 0
  6053. 2025-05-18 10:19:39,046 - sglang - INFO - [2025-05-18 10:19:39 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.37, #queue-req: 0
  6054. 2025-05-18 10:19:39,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  6055. 2025-05-18 10:19:39,865 - sglang - INFO - [2025-05-18 10:19:39 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.87, #queue-req: 0
  6056. 2025-05-18 10:19:39,865 - __main__ - INFO - sglang running req: 1 queue req: 0
  6057. 2025-05-18 10:19:40,197 - __main__ - INFO - Queue remaining: 0
  6058. 2025-05-18 10:19:40,197 - __main__ - INFO -
  6059. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6060. ----------------------------------------------------------------------------------
  6061. 2025-05-18 10:19:40,197 - __main__ - INFO -
  6062. Worker ID | started
  6063. ----------+--------
  6064. 0 | 1
  6065. 2025-05-18 10:19:40,682 - sglang - INFO - [2025-05-18 10:19:40 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.95, #queue-req: 0
  6066. 2025-05-18 10:19:40,682 - __main__ - INFO - sglang running req: 1 queue req: 0
  6067. 2025-05-18 10:19:41,498 - sglang - INFO - [2025-05-18 10:19:41 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 49.00, #queue-req: 0
  6068. 2025-05-18 10:19:41,498 - __main__ - INFO - sglang running req: 1 queue req: 0
  6069. 2025-05-18 10:19:42,316 - sglang - INFO - [2025-05-18 10:19:42 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.92, #queue-req: 0
  6070. 2025-05-18 10:19:42,316 - __main__ - INFO - sglang running req: 1 queue req: 0
  6071. 2025-05-18 10:19:42,489 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  6072. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 1 exiting due to empty queue
  6073. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 2 exiting due to empty queue
  6074. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 3 exiting due to empty queue
  6075. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 4 exiting due to empty queue
  6076. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 5 exiting due to empty queue
  6077. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 6 exiting due to empty queue
  6078. 2025-05-18 10:19:42,490 - __main__ - INFO - Worker 7 exiting due to empty queue
  6079. 2025-05-18 10:19:43,135 - sglang - INFO - [2025-05-18 10:19:43 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.84, #queue-req: 0
  6080. 2025-05-18 10:19:43,135 - __main__ - INFO - sglang running req: 1 queue req: 0
  6081. 2025-05-18 10:19:43,955 - sglang - INFO - [2025-05-18 10:19:43 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.79, #queue-req: 0
  6082. 2025-05-18 10:19:43,955 - __main__ - INFO - sglang running req: 1 queue req: 0
  6083. 2025-05-18 10:19:44,249 - __main__ - INFO - Finished TaskGroup for worker on 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
  6084. 2025-05-18 10:19:44,250 - __main__ - INFO - Got 1 docs for 9901aa831d8e2be5b7f3bdc190a5653fe9f5b256
  6085. 2025-05-18 10:19:44,251 - __main__ - INFO - Worker 0 exiting due to empty queue
  6086. 2025-05-18 10:19:44,251 - __main__ - INFO - Work done
  6087. 2025-05-18 10:19:44,252 - __main__ - INFO - Got cancellation request for SGLang server
  6088. 2025-05-21 10:48:29,495 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6089. 2025-05-21 10:48:29,495 - __main__ - INFO - Loading file at olmocr_workspace/job_1747795702/input.pdf as PDF document
  6090. 2025-05-21 10:48:29,495 - __main__ - INFO - Found 1 total pdf paths to add
  6091. 2025-05-21 10:48:29,498 - __main__ - INFO - Calculated items_per_group: 100 based on average pages per PDF: 5.00
  6092. 2025-05-21 10:48:29,750 - __main__ - INFO - Starting pipeline with PID 564298
  6093. 2025-05-21 10:48:29,750 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  6094. 2025-05-21 10:48:30,314 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  6095. 2025-05-21 10:48:31,353 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  6096. 2025-05-21 10:48:32,411 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  6097. 2025-05-21 10:48:33,469 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  6098. 2025-05-21 10:48:34,533 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  6099. 2025-05-21 10:48:35,679 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  6100. 2025-05-21 10:48:36,724 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  6101. 2025-05-21 10:48:36,776 - sglang - INFO - [2025-05-21 10:48:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=968885299, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6102. 2025-05-21 10:48:36,776 - __main__ - INFO - [2025-05-21 10:48:36] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=968885299, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6103. 2025-05-21 10:48:37,768 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  6104. 2025-05-21 10:48:38,813 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  6105. 2025-05-21 10:48:39,857 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  6106. 2025-05-21 10:48:40,071 - sglang - INFO - [2025-05-21 10:48:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6107. 2025-05-21 10:48:40,071 - __main__ - INFO - [2025-05-21 10:48:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6108. 2025-05-21 10:48:40,900 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  6109. 2025-05-21 10:48:41,946 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  6110. 2025-05-21 10:48:42,991 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  6111. 2025-05-21 10:48:44,035 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  6112. 2025-05-21 10:48:45,079 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  6113. 2025-05-21 10:48:46,112 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  6114. 2025-05-21 10:48:46,231 - sglang - INFO - [2025-05-21 10:48:46 TP0] Overlap scheduler is disabled for multimodal models.
  6115. 2025-05-21 10:48:46,231 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Overlap scheduler is disabled for multimodal models.
  6116. 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6117. 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6118. 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6119. 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6120. 2025-05-21 10:48:46,713 - sglang - INFO - [2025-05-21 10:48:46 TP0] Init torch distributed begin.
  6121. 2025-05-21 10:48:46,713 - __main__ - INFO - [2025-05-21 10:48:46 TP0] Init torch distributed begin.
  6122. 2025-05-21 10:48:47,181 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  6123. 2025-05-21 10:48:48,247 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  6124. 2025-05-21 10:48:49,313 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  6125. 2025-05-21 10:48:50,375 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  6126. 2025-05-21 10:48:51,429 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  6127. 2025-05-21 10:48:52,155 - sglang - INFO - [2025-05-21 10:48:52 TP0] Load weight begin. avail mem=23.33 GB
  6128. 2025-05-21 10:48:52,156 - __main__ - INFO - [2025-05-21 10:48:52 TP0] Load weight begin. avail mem=23.33 GB
  6129. 2025-05-21 10:48:52,507 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  6130. 2025-05-21 10:48:53,239 - sglang - INFO - [2025-05-21 10:48:53 TP0] Using model weights format ['*.safetensors']
  6131. 2025-05-21 10:48:53,240 - __main__ - INFO - [2025-05-21 10:48:53 TP0] Using model weights format ['*.safetensors']
  6132. 2025-05-21 10:48:53,586 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  6133. 2025-05-21 10:48:53,738 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6134. 2025-05-21 10:48:53,738 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6135. 2025-05-21 10:48:54,049 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.22it/s]
  6136. 2025-05-21 10:48:54,050 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.22it/s]
  6137. 2025-05-21 10:48:54,661 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  6138. 2025-05-21 10:48:55,040 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.41it/s]
  6139. 2025-05-21 10:48:55,041 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.41it/s]
  6140. 2025-05-21 10:48:55,740 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  6141. 2025-05-21 10:48:55,997 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
  6142. 2025-05-21 10:48:55,998 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
  6143. 2025-05-21 10:48:56,818 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  6144. 2025-05-21 10:48:56,931 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
  6145. 2025-05-21 10:48:56,931 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.15it/s]
  6146. 2025-05-21 10:48:56,931 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.25it/s]
  6147. 2025-05-21 10:48:56,931 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.25it/s]
  6148. 2025-05-21 10:48:56,931 - sglang - INFO -
  6149. 2025-05-21 10:48:56,931 - __main__ - INFO -
  6150. 2025-05-21 10:48:57,064 - sglang - INFO - [2025-05-21 10:48:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6151. 2025-05-21 10:48:57,064 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6152. 2025-05-21 10:48:57,070 - sglang - INFO - [2025-05-21 10:48:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6153. 2025-05-21 10:48:57,070 - __main__ - INFO - [2025-05-21 10:48:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6154. 2025-05-21 10:48:57,070 - sglang - INFO - [2025-05-21 10:48:57 TP0] Memory pool end. avail mem=5.30 GB
  6155. 2025-05-21 10:48:57,070 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Memory pool end. avail mem=5.30 GB
  6156. 2025-05-21 10:48:57,221 - sglang - INFO - [2025-05-21 10:48:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  6157. 2025-05-21 10:48:57,222 - __main__ - INFO - [2025-05-21 10:48:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  6158. 2025-05-21 10:48:57,897 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  6159. 2025-05-21 10:48:58,974 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.05it/s] 50%|█████ | 2/4 [00:01<00:01, 1.84it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s] 100%|██████████| 4/4 [00:01<00:00, 2.84it/s] 100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
  6160. 2025-05-21 10:48:58,975 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.05it/s] 50%|█████ | 2/4 [00:01<00:01, 1.84it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.43it/s] 100%|██████████| 4/4 [00:01<00:00, 2.84it/s] 100%|██████████| 4/4 [00:01<00:00, 2.32it/s]
  6161. 2025-05-21 10:48:58,975 - sglang - INFO - [2025-05-21 10:48:58 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  6162. 2025-05-21 10:48:58,975 - __main__ - INFO - [2025-05-21 10:48:58 TP0] Capture cuda graph end. Time elapsed: 1.73 s
  6163. 2025-05-21 10:48:58,976 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  6164. 2025-05-21 10:49:00,050 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  6165. 2025-05-21 10:49:01,116 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  6166. 2025-05-21 10:49:01,339 - sglang - INFO - [2025-05-21 10:49:01 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6167. 2025-05-21 10:49:01,339 - __main__ - INFO - [2025-05-21 10:49:01 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6168. 2025-05-21 10:49:02,215 - __main__ - INFO - sglang server is ready.
  6169. 2025-05-21 10:49:02,215 - __main__ - INFO - Queue remaining: 1
  6170. 2025-05-21 10:49:02,215 - __main__ - INFO -
  6171. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6172. ----------------------------------------------------------------------------------
  6173. 2025-05-21 10:49:02,216 - __main__ - INFO -
  6174. Worker ID
  6175. ---------
  6176. 2025-05-21 10:49:02,216 - __main__ - INFO - Worker 0 processing work item 81f62eccf96bd22b741354b451ad2460310111e8
  6177. 2025-05-21 10:49:02,216 - __main__ - INFO - Created all tasks for 81f62eccf96bd22b741354b451ad2460310111e8
  6178. 2025-05-21 10:49:02,222 - __main__ - INFO - Got 5 pages to do for olmocr_workspace/job_1747795702/input.pdf in worker 0
  6179. 2025-05-21 10:49:02,416 - sglang - INFO - [2025-05-21 10:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6180. 2025-05-21 10:49:02,416 - __main__ - INFO - [2025-05-21 10:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6181. 2025-05-21 10:49:02,416 - __main__ - INFO - sglang running req: 0 queue req: 0
  6182. 2025-05-21 10:49:03,182 - sglang - INFO - [2025-05-21 10:49:03] The server is fired up and ready to roll!
  6183. 2025-05-21 10:49:03,182 - __main__ - INFO - [2025-05-21 10:49:03] The server is fired up and ready to roll!
  6184. 2025-05-21 10:49:08,501 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-1
  6185. 2025-05-21 10:49:08,536 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-2
  6186. 2025-05-21 10:49:08,574 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-3
  6187. 2025-05-21 10:49:08,602 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-4
  6188. 2025-05-21 10:49:08,646 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795702/input.pdf-5
  6189. 2025-05-21 10:49:12,218 - __main__ - INFO - Queue remaining: 0
  6190. 2025-05-21 10:49:12,218 - __main__ - INFO -
  6191. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6192. ----------------------------------------------------------------------------------
  6193. 2025-05-21 10:49:12,218 - __main__ - INFO -
  6194. Worker ID | started
  6195. ----------+--------
  6196. 0 | 5
  6197. 2025-05-21 10:49:22,205 - sglang - INFO - [2025-05-21 10:49:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6198. 2025-05-21 10:49:22,205 - __main__ - INFO - sglang running req: 0 queue req: 0
  6199. 2025-05-21 10:49:22,219 - __main__ - INFO - Queue remaining: 0
  6200. 2025-05-21 10:49:22,219 - __main__ - INFO -
  6201. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6202. ----------------------------------------------------------------------------------
  6203. 2025-05-21 10:49:22,220 - __main__ - INFO -
  6204. Worker ID | started
  6205. ----------+--------
  6206. 0 | 5
  6207. 2025-05-21 10:49:24,183 - sglang - INFO - [2025-05-21 10:49:24 TP0] Prefill batch. #new-seq: 4, #new-token: 8384, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  6208. 2025-05-21 10:49:24,183 - __main__ - INFO - sglang running req: 1 queue req: 0
  6209. 2025-05-21 10:49:28,600 - sglang - INFO - [2025-05-21 10:49:28 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.31, #queue-req: 0
  6210. 2025-05-21 10:49:28,600 - __main__ - INFO - sglang running req: 5 queue req: 0
  6211. 2025-05-21 10:49:29,457 - sglang - INFO - [2025-05-21 10:49:29 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 233.26, #queue-req: 0
  6212. 2025-05-21 10:49:29,457 - __main__ - INFO - sglang running req: 5 queue req: 0
  6213. 2025-05-21 10:49:30,315 - sglang - INFO - [2025-05-21 10:49:30 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 233.17, #queue-req: 0
  6214. 2025-05-21 10:49:30,315 - __main__ - INFO - sglang running req: 5 queue req: 0
  6215. 2025-05-21 10:49:31,173 - sglang - INFO - [2025-05-21 10:49:31 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 233.00, #queue-req: 0
  6216. 2025-05-21 10:49:31,174 - __main__ - INFO - sglang running req: 5 queue req: 0
  6217. 2025-05-21 10:49:32,032 - sglang - INFO - [2025-05-21 10:49:32 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 233.02, #queue-req: 0
  6218. 2025-05-21 10:49:32,032 - __main__ - INFO - sglang running req: 5 queue req: 0
  6219. 2025-05-21 10:49:32,220 - __main__ - INFO - Queue remaining: 0
  6220. 2025-05-21 10:49:32,221 - __main__ - INFO -
  6221. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6222. ----------------------------------------------------------------------------------
  6223. 2025-05-21 10:49:32,221 - __main__ - INFO -
  6224. Worker ID | started
  6225. ----------+--------
  6226. 0 | 5
  6227. 2025-05-21 10:49:32,891 - sglang - INFO - [2025-05-21 10:49:32 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 232.69, #queue-req: 0
  6228. 2025-05-21 10:49:32,892 - __main__ - INFO - sglang running req: 5 queue req: 0
  6229. 2025-05-21 10:49:33,496 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  6230. 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 1 exiting due to empty queue
  6231. 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 2 exiting due to empty queue
  6232. 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 3 exiting due to empty queue
  6233. 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 4 exiting due to empty queue
  6234. 2025-05-21 10:49:33,496 - __main__ - INFO - Worker 5 exiting due to empty queue
  6235. 2025-05-21 10:49:33,497 - __main__ - INFO - Worker 6 exiting due to empty queue
  6236. 2025-05-21 10:49:33,497 - __main__ - INFO - Worker 7 exiting due to empty queue
  6237. 2025-05-21 10:49:33,754 - sglang - INFO - [2025-05-21 10:49:33 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 231.74, #queue-req: 0
  6238. 2025-05-21 10:49:33,755 - __main__ - INFO - sglang running req: 5 queue req: 0
  6239. 2025-05-21 10:49:34,617 - sglang - INFO - [2025-05-21 10:49:34 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 231.73, #queue-req: 0
  6240. 2025-05-21 10:49:34,618 - __main__ - INFO - sglang running req: 5 queue req: 0
  6241. 2025-05-21 10:49:35,477 - sglang - INFO - [2025-05-21 10:49:35 TP0] Decode batch. #running-req: 4, #token: 9730, token usage: 0.26, gen throughput (token/s): 217.58, #queue-req: 0
  6242. 2025-05-21 10:49:35,477 - __main__ - INFO - sglang running req: 4 queue req: 0
  6243. 2025-05-21 10:49:36,311 - sglang - INFO - [2025-05-21 10:49:36 TP0] Decode batch. #running-req: 2, #token: 5146, token usage: 0.14, gen throughput (token/s): 111.43, #queue-req: 0
  6244. 2025-05-21 10:49:36,312 - __main__ - INFO - sglang running req: 2 queue req: 0
  6245. 2025-05-21 10:49:37,139 - sglang - INFO - [2025-05-21 10:49:37 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 61.65, #queue-req: 0
  6246. 2025-05-21 10:49:37,139 - __main__ - INFO - sglang running req: 1 queue req: 0
  6247. 2025-05-21 10:49:37,965 - sglang - INFO - [2025-05-21 10:49:37 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 48.41, #queue-req: 0
  6248. 2025-05-21 10:49:37,965 - __main__ - INFO - sglang running req: 1 queue req: 0
  6249. 2025-05-21 10:49:38,631 - __main__ - INFO - Finished TaskGroup for worker on 81f62eccf96bd22b741354b451ad2460310111e8
  6250. 2025-05-21 10:49:38,631 - __main__ - INFO - Got 1 docs for 81f62eccf96bd22b741354b451ad2460310111e8
  6251. 2025-05-21 10:49:38,633 - __main__ - INFO - Worker 0 exiting due to empty queue
  6252. 2025-05-21 10:49:38,633 - __main__ - INFO - Work done
  6253. 2025-05-21 10:49:38,634 - __main__ - INFO - Got cancellation request for SGLang server
  6254. 2025-05-21 10:51:53,346 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6255. 2025-05-21 10:51:53,346 - __main__ - INFO - Loading file at olmocr_workspace/job_1747795907/input.pdf as PDF document
  6256. 2025-05-21 10:51:53,347 - __main__ - INFO - Found 1 total pdf paths to add
  6257. 2025-05-21 10:51:53,349 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  6258. 2025-05-21 10:51:53,562 - __main__ - INFO - Starting pipeline with PID 565624
  6259. 2025-05-21 10:51:53,562 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  6260. 2025-05-21 10:51:59,266 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  6261. 2025-05-21 10:52:00,312 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  6262. 2025-05-21 10:52:01,373 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  6263. 2025-05-21 10:52:02,437 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  6264. 2025-05-21 10:52:03,506 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  6265. 2025-05-21 10:52:04,575 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  6266. 2025-05-21 10:52:04,951 - sglang - INFO - [2025-05-21 10:52:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=425685376, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6267. 2025-05-21 10:52:04,951 - __main__ - INFO - [2025-05-21 10:52:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=425685376, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6268. 2025-05-21 10:52:05,654 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  6269. 2025-05-21 10:52:06,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  6270. 2025-05-21 10:52:07,785 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  6271. 2025-05-21 10:52:08,232 - sglang - INFO - [2025-05-21 10:52:08] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6272. 2025-05-21 10:52:08,233 - __main__ - INFO - [2025-05-21 10:52:08] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6273. 2025-05-21 10:52:08,865 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  6274. 2025-05-21 10:52:09,941 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  6275. 2025-05-21 10:52:11,008 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  6276. 2025-05-21 10:52:12,069 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  6277. 2025-05-21 10:52:13,134 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  6278. 2025-05-21 10:52:14,200 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  6279. 2025-05-21 10:52:15,266 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  6280. 2025-05-21 10:52:16,328 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  6281. 2025-05-21 10:52:17,382 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  6282. 2025-05-21 10:52:18,448 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  6283. 2025-05-21 10:52:19,029 - sglang - INFO - [2025-05-21 10:52:19 TP0] Overlap scheduler is disabled for multimodal models.
  6284. 2025-05-21 10:52:19,029 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Overlap scheduler is disabled for multimodal models.
  6285. 2025-05-21 10:52:19,526 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  6286. 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6287. 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6288. 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6289. 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6290. 2025-05-21 10:52:19,536 - sglang - INFO - [2025-05-21 10:52:19 TP0] Init torch distributed begin.
  6291. 2025-05-21 10:52:19,536 - __main__ - INFO - [2025-05-21 10:52:19 TP0] Init torch distributed begin.
  6292. 2025-05-21 10:52:20,605 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  6293. 2025-05-21 10:52:21,676 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  6294. 2025-05-21 10:52:22,746 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  6295. 2025-05-21 10:52:23,817 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  6296. 2025-05-21 10:52:24,888 - sglang - INFO - [2025-05-21 10:52:24 TP0] Load weight begin. avail mem=23.33 GB
  6297. 2025-05-21 10:52:24,888 - __main__ - INFO - [2025-05-21 10:52:24 TP0] Load weight begin. avail mem=23.33 GB
  6298. 2025-05-21 10:52:24,889 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  6299. 2025-05-21 10:52:25,963 - sglang - INFO - [2025-05-21 10:52:25 TP0] Using model weights format ['*.safetensors']
  6300. 2025-05-21 10:52:25,963 - __main__ - INFO - [2025-05-21 10:52:25 TP0] Using model weights format ['*.safetensors']
  6301. 2025-05-21 10:52:25,964 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  6302. 2025-05-21 10:52:26,469 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6303. 2025-05-21 10:52:26,469 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6304. 2025-05-21 10:52:26,761 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  6305. 2025-05-21 10:52:26,761 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.43it/s]
  6306. 2025-05-21 10:52:27,064 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  6307. 2025-05-21 10:52:27,680 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.51it/s]
  6308. 2025-05-21 10:52:27,680 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.51it/s]
  6309. 2025-05-21 10:52:28,144 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  6310. 2025-05-21 10:52:28,602 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  6311. 2025-05-21 10:52:28,602 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.28it/s]
  6312. 2025-05-21 10:52:29,225 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  6313. 2025-05-21 10:52:29,494 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  6314. 2025-05-21 10:52:29,494 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  6315. 2025-05-21 10:52:29,494 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  6316. 2025-05-21 10:52:29,494 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.32it/s]
  6317. 2025-05-21 10:52:29,495 - sglang - INFO -
  6318. 2025-05-21 10:52:29,495 - __main__ - INFO -
  6319. 2025-05-21 10:52:29,640 - sglang - INFO - [2025-05-21 10:52:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6320. 2025-05-21 10:52:29,640 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6321. 2025-05-21 10:52:29,646 - sglang - INFO - [2025-05-21 10:52:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6322. 2025-05-21 10:52:29,647 - __main__ - INFO - [2025-05-21 10:52:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6323. 2025-05-21 10:52:29,647 - sglang - INFO - [2025-05-21 10:52:29 TP0] Memory pool end. avail mem=5.30 GB
  6324. 2025-05-21 10:52:29,647 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Memory pool end. avail mem=5.30 GB
  6325. 2025-05-21 10:52:29,821 - sglang - INFO - [2025-05-21 10:52:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  6326. 2025-05-21 10:52:29,822 - __main__ - INFO - [2025-05-21 10:52:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  6327. 2025-05-21 10:52:30,304 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  6328. 2025-05-21 10:52:31,384 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  6329. 2025-05-21 10:52:31,698 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.14s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.13it/s]
  6330. 2025-05-21 10:52:31,699 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.14s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.13it/s]
  6331. 2025-05-21 10:52:31,699 - sglang - INFO - [2025-05-21 10:52:31 TP0] Capture cuda graph end. Time elapsed: 1.88 s
  6332. 2025-05-21 10:52:31,699 - __main__ - INFO - [2025-05-21 10:52:31 TP0] Capture cuda graph end. Time elapsed: 1.88 s
  6333. 2025-05-21 10:52:32,463 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  6334. 2025-05-21 10:52:33,532 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  6335. 2025-05-21 10:52:34,223 - sglang - INFO - [2025-05-21 10:52:34 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6336. 2025-05-21 10:52:34,223 - __main__ - INFO - [2025-05-21 10:52:34 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6337. 2025-05-21 10:52:34,626 - __main__ - INFO - sglang server is ready.
  6338. 2025-05-21 10:52:34,626 - __main__ - INFO - Queue remaining: 1
  6339. 2025-05-21 10:52:34,626 - __main__ - INFO -
  6340. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6341. ----------------------------------------------------------------------------------
  6342. 2025-05-21 10:52:34,626 - __main__ - INFO -
  6343. Worker ID
  6344. ---------
  6345. 2025-05-21 10:52:34,626 - __main__ - INFO - Worker 0 processing work item 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
  6346. 2025-05-21 10:52:34,626 - __main__ - INFO - Created all tasks for 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
  6347. 2025-05-21 10:52:34,629 - __main__ - INFO - Got 1 pages to do for olmocr_workspace/job_1747795907/input.pdf in worker 0
  6348. 2025-05-21 10:52:35,294 - sglang - INFO - [2025-05-21 10:52:35 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6349. 2025-05-21 10:52:35,294 - __main__ - INFO - [2025-05-21 10:52:35 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6350. 2025-05-21 10:52:35,294 - __main__ - INFO - sglang running req: 0 queue req: 0
  6351. 2025-05-21 10:52:35,916 - sglang - INFO - [2025-05-21 10:52:35] The server is fired up and ready to roll!
  6352. 2025-05-21 10:52:35,916 - __main__ - INFO - [2025-05-21 10:52:35] The server is fired up and ready to roll!
  6353. 2025-05-21 10:52:41,022 - __main__ - INFO - Built page query for olmocr_workspace/job_1747795907/input.pdf-1
  6354. 2025-05-21 10:52:44,679 - __main__ - INFO - Queue remaining: 0
  6355. 2025-05-21 10:52:44,679 - __main__ - INFO -
  6356. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6357. ----------------------------------------------------------------------------------
  6358. 2025-05-21 10:52:44,679 - __main__ - INFO -
  6359. Worker ID | started
  6360. ----------+--------
  6361. 0 | 1
  6362. 2025-05-21 10:52:54,680 - __main__ - INFO - Queue remaining: 0
  6363. 2025-05-21 10:52:54,680 - __main__ - INFO -
  6364. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6365. ----------------------------------------------------------------------------------
  6366. 2025-05-21 10:52:54,680 - __main__ - INFO -
  6367. Worker ID | started
  6368. ----------+--------
  6369. 0 | 1
  6370. 2025-05-21 10:52:55,180 - sglang - INFO - [2025-05-21 10:52:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1859, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6371. 2025-05-21 10:52:55,180 - __main__ - INFO - sglang running req: 0 queue req: 0
  6372. 2025-05-21 10:52:57,568 - sglang - INFO - [2025-05-21 10:52:57 TP0] Decode batch. #running-req: 1, #token: 1892, token usage: 0.05, gen throughput (token/s): 1.71, #queue-req: 0
  6373. 2025-05-21 10:52:57,568 - __main__ - INFO - sglang running req: 1 queue req: 0
  6374. 2025-05-21 10:52:58,388 - sglang - INFO - [2025-05-21 10:52:58 TP0] Decode batch. #running-req: 1, #token: 1932, token usage: 0.05, gen throughput (token/s): 48.78, #queue-req: 0
  6375. 2025-05-21 10:52:58,388 - __main__ - INFO - sglang running req: 1 queue req: 0
  6376. 2025-05-21 10:52:59,208 - sglang - INFO - [2025-05-21 10:52:59 TP0] Decode batch. #running-req: 1, #token: 1972, token usage: 0.05, gen throughput (token/s): 48.76, #queue-req: 0
  6377. 2025-05-21 10:52:59,208 - __main__ - INFO - sglang running req: 1 queue req: 0
  6378. 2025-05-21 10:53:00,028 - sglang - INFO - [2025-05-21 10:53:00 TP0] Decode batch. #running-req: 1, #token: 2012, token usage: 0.05, gen throughput (token/s): 48.79, #queue-req: 0
  6379. 2025-05-21 10:53:00,028 - __main__ - INFO - sglang running req: 1 queue req: 0
  6380. 2025-05-21 10:53:00,849 - sglang - INFO - [2025-05-21 10:53:00 TP0] Decode batch. #running-req: 1, #token: 2052, token usage: 0.05, gen throughput (token/s): 48.74, #queue-req: 0
  6381. 2025-05-21 10:53:00,849 - __main__ - INFO - sglang running req: 1 queue req: 0
  6382. 2025-05-21 10:53:01,671 - sglang - INFO - [2025-05-21 10:53:01 TP0] Decode batch. #running-req: 1, #token: 2092, token usage: 0.06, gen throughput (token/s): 48.64, #queue-req: 0
  6383. 2025-05-21 10:53:01,671 - __main__ - INFO - sglang running req: 1 queue req: 0
  6384. 2025-05-21 10:53:02,494 - sglang - INFO - [2025-05-21 10:53:02 TP0] Decode batch. #running-req: 1, #token: 2132, token usage: 0.06, gen throughput (token/s): 48.61, #queue-req: 0
  6385. 2025-05-21 10:53:02,494 - __main__ - INFO - sglang running req: 1 queue req: 0
  6386. 2025-05-21 10:53:02,543 - __main__ - INFO - Finished TaskGroup for worker on 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
  6387. 2025-05-21 10:53:02,543 - __main__ - INFO - Got 1 docs for 92a0d2c3d6bc2676d1a017a5af100cdd331b9231
  6388. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 1 exiting due to empty queue
  6389. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 2 exiting due to empty queue
  6390. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 3 exiting due to empty queue
  6391. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 4 exiting due to empty queue
  6392. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 5 exiting due to empty queue
  6393. 2025-05-21 10:53:02,545 - __main__ - INFO - Worker 6 exiting due to empty queue
  6394. 2025-05-21 10:53:02,546 - __main__ - INFO - Worker 7 exiting due to empty queue
  6395. 2025-05-21 10:53:02,546 - __main__ - INFO - Worker 0 exiting due to empty queue
  6396. 2025-05-21 10:53:02,546 - __main__ - INFO - Work done
  6397. 2025-05-21 10:53:02,546 - __main__ - INFO - Got cancellation request for SGLang server
  6398. 2025-07-19 23:00:28,292 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6399. 2025-07-19 23:00:28,293 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  6400. 2025-07-19 23:00:28,293 - __main__ - INFO - Found 1 total pdf paths to add
  6401. 2025-07-19 23:00:28,299 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  6402. 2025-07-19 23:00:28,492 - __main__ - INFO - Starting pipeline with PID 551007
  6403. 2025-07-19 23:00:28,492 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  6404. 2025-07-19 23:04:05,046 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6405. 2025-07-19 23:04:05,046 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  6406. 2025-07-19 23:04:05,046 - __main__ - INFO - Found 1 total pdf paths to add
  6407. 2025-07-19 23:04:05,050 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  6408. 2025-07-19 23:04:05,271 - __main__ - INFO - Starting pipeline with PID 551127
  6409. 2025-07-19 23:04:05,271 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  6410. 2025-07-19 23:04:05,740 - __main__ - INFO - No work to do, exiting
  6411. 2025-07-19 23:04:30,925 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  6412. 2025-07-19 23:04:30,925 - __main__ - INFO - Loading file at tests/gnarly_pdfs/ambiguous.pdf as PDF document
  6413. 2025-07-19 23:04:30,925 - __main__ - INFO - Loading file at tests/gnarly_pdfs/badlines.pdf as PDF document
  6414. 2025-07-19 23:04:30,926 - __main__ - INFO - Loading file at tests/gnarly_pdfs/bws_book_ch2.pdf as PDF document
  6415. 2025-07-19 23:04:30,926 - __main__ - INFO - Loading file at tests/gnarly_pdfs/delivery.pdf as PDF document
  6416. 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/discoverworld_crazy_tables.pdf as PDF document
  6417. 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/dolma-page-1.pdf as PDF document
  6418. 2025-07-19 23:04:30,927 - __main__ - INFO - Loading file at tests/gnarly_pdfs/edgar.pdf as PDF document
  6419. 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/failing_anchor_pg4.pdf as PDF document
  6420. 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/failing_pdf_pg9.pdf as PDF document
  6421. 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/form_on_later_pages.pdf as PDF document
  6422. 2025-07-19 23:04:30,928 - __main__ - INFO - Loading file at tests/gnarly_pdfs/guidebook_failed_pages.pdf as PDF document
  6423. 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/handwriting_bad_ocr.pdf as PDF document
  6424. 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  6425. 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/instructions_and_schematics.pdf as PDF document
  6426. 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint1.pdf as PDF document
  6427. 2025-07-19 23:04:30,929 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint2.pdf as PDF document
  6428. 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/large_prompt_hint3.pdf as PDF document
  6429. 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/load_v_error.pdf as PDF document
  6430. 2025-07-19 23:04:30,930 - __main__ - INFO - Loading file at tests/gnarly_pdfs/lots_of_chem_tables.pdf as PDF document
  6431. 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/lots_of_sci_tables.pdf as PDF document
  6432. 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/map1.pdf as PDF document
  6433. 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/most_content_in_image_form.pdf as PDF document
  6434. 2025-07-19 23:04:30,931 - __main__ - INFO - Loading file at tests/gnarly_pdfs/newspaper.pdf as PDF document
  6435. 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/not_parsing.pdf as PDF document
  6436. 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/not_parsing2.pdf as PDF document
  6437. 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/olmo-page-1.pdf as PDF document
  6438. 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/overrun_on_pg8.pdf as PDF document
  6439. 2025-07-19 23:04:30,932 - __main__ - INFO - Loading file at tests/gnarly_pdfs/pdftotext_two_column_issue.pdf as PDF document
  6440. 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf as PDF document
  6441. 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/skinnypage.pdf as PDF document
  6442. 2025-07-19 23:04:30,933 - __main__ - INFO - Loading file at tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf as PDF document
  6443. 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/slideshow_mostly_images.pdf as PDF document
  6444. 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/small_page_size.pdf as PDF document
  6445. 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/some_ocr1.pdf as PDF document
  6446. 2025-07-19 23:04:30,934 - __main__ - INFO - Loading file at tests/gnarly_pdfs/ti89_guidebook_programming.pdf as PDF document
  6447. 2025-07-19 23:04:30,935 - __main__ - INFO - Loading file at tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf as PDF document
  6448. 2025-07-19 23:04:30,935 - __main__ - INFO - Found 36 total pdf paths to add
  6449. 2025-07-19 23:04:31,500 - __main__ - INFO - Calculated items_per_group: 32 based on average pages per PDF: 15.42
  6450. 2025-07-19 23:04:31,714 - __main__ - INFO - Starting pipeline with PID 551214
  6451. 2025-07-19 23:04:31,714 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  6452. 2025-07-19 23:04:37,334 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  6453. 2025-07-19 23:04:38,367 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  6454. 2025-07-19 23:04:39,407 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  6455. 2025-07-19 23:04:40,465 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  6456. 2025-07-19 23:04:41,519 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  6457. 2025-07-19 23:04:42,584 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  6458. 2025-07-19 23:04:43,649 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  6459. 2025-07-19 23:04:44,718 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  6460. 2025-07-19 23:04:45,779 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  6461. 2025-07-19 23:04:46,844 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  6462. 2025-07-19 23:04:47,273 - sglang - INFO - [2025-07-19 23:04:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=83915160, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6463. 2025-07-19 23:04:47,273 - __main__ - INFO - [2025-07-19 23:04:47] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=83915160, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  6464. 2025-07-19 23:04:47,912 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  6465. 2025-07-19 23:04:48,984 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  6466. 2025-07-19 23:04:50,054 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  6467. 2025-07-19 23:04:51,122 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  6468. 2025-07-19 23:04:51,816 - sglang - INFO - [2025-07-19 23:04:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6469. 2025-07-19 23:04:51,816 - __main__ - INFO - [2025-07-19 23:04:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  6470. 2025-07-19 23:04:52,199 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  6471. 2025-07-19 23:04:53,256 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  6472. 2025-07-19 23:04:54,312 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  6473. 2025-07-19 23:04:55,353 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  6474. 2025-07-19 23:04:56,400 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  6475. 2025-07-19 23:04:57,453 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  6476. 2025-07-19 23:04:57,680 - sglang - INFO - [2025-07-19 23:04:57 TP0] Overlap scheduler is disabled for multimodal models.
  6477. 2025-07-19 23:04:57,681 - __main__ - INFO - [2025-07-19 23:04:57 TP0] Overlap scheduler is disabled for multimodal models.
  6478. 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6479. 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  6480. 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6481. 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  6482. 2025-07-19 23:04:58,369 - sglang - INFO - [2025-07-19 23:04:58 TP0] Init torch distributed begin.
  6483. 2025-07-19 23:04:58,369 - __main__ - INFO - [2025-07-19 23:04:58 TP0] Init torch distributed begin.
  6484. 2025-07-19 23:04:58,530 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  6485. 2025-07-19 23:04:59,597 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  6486. 2025-07-19 23:05:00,668 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  6487. 2025-07-19 23:05:01,736 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  6488. 2025-07-19 23:05:02,803 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  6489. 2025-07-19 23:05:03,788 - sglang - INFO - [2025-07-19 23:05:03 TP0] Load weight begin. avail mem=23.33 GB
  6490. 2025-07-19 23:05:03,788 - __main__ - INFO - [2025-07-19 23:05:03 TP0] Load weight begin. avail mem=23.33 GB
  6491. 2025-07-19 23:05:03,879 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  6492. 2025-07-19 23:05:04,948 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  6493. 2025-07-19 23:05:05,134 - sglang - INFO - [2025-07-19 23:05:05 TP0] Using model weights format ['*.safetensors']
  6494. 2025-07-19 23:05:05,134 - __main__ - INFO - [2025-07-19 23:05:05 TP0] Using model weights format ['*.safetensors']
  6495. 2025-07-19 23:05:05,873 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6496. 2025-07-19 23:05:05,874 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  6497. 2025-07-19 23:05:06,027 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  6498. 2025-07-19 23:05:07,097 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  6499. 2025-07-19 23:05:08,166 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  6500. 2025-07-19 23:05:09,236 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  6501. 2025-07-19 23:05:10,303 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  6502. 2025-07-19 23:05:10,368 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.49s/it]
  6503. 2025-07-19 23:05:10,369 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.49s/it]
  6504. 2025-07-19 23:05:11,382 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  6505. 2025-07-19 23:05:12,447 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  6506. 2025-07-19 23:05:13,501 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  6507. 2025-07-19 23:05:14,571 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  6508. 2025-07-19 23:05:15,639 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  6509. 2025-07-19 23:05:16,707 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  6510. 2025-07-19 23:05:17,777 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  6511. 2025-07-19 23:05:18,847 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  6512. 2025-07-19 23:05:19,917 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  6513. 2025-07-19 23:05:20,987 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  6514. 2025-07-19 23:05:22,053 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  6515. 2025-07-19 23:05:23,123 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  6516. 2025-07-19 23:05:23,658 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:17<00:19, 9.67s/it]
  6517. 2025-07-19 23:05:23,658 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:17<00:19, 9.67s/it]
  6518. 2025-07-19 23:05:24,202 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  6519. 2025-07-19 23:05:25,271 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  6520. 2025-07-19 23:05:26,341 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  6521. 2025-07-19 23:05:27,407 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  6522. 2025-07-19 23:05:28,473 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  6523. 2025-07-19 23:05:29,527 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  6524. 2025-07-19 23:05:30,594 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  6525. 2025-07-19 23:05:31,664 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  6526. 2025-07-19 23:05:32,733 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  6527. 2025-07-19 23:05:33,804 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  6528. 2025-07-19 23:05:34,869 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  6529. 2025-07-19 23:05:35,940 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  6530. 2025-07-19 23:05:36,659 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:11, 11.19s/it]
  6531. 2025-07-19 23:05:36,659 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:11, 11.19s/it]
  6532. 2025-07-19 23:05:37,020 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  6533. 2025-07-19 23:05:38,087 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  6534. 2025-07-19 23:05:39,157 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  6535. 2025-07-19 23:05:40,223 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  6536. 2025-07-19 23:05:41,299 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  6537. 2025-07-19 23:05:42,363 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  6538. 2025-07-19 23:05:43,430 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  6539. 2025-07-19 23:05:44,496 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  6540. 2025-07-19 23:05:45,550 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  6541. 2025-07-19 23:05:46,615 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  6542. 2025-07-19 23:05:47,684 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  6543. 2025-07-19 23:05:48,752 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  6544. 2025-07-19 23:05:49,817 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  6545. 2025-07-19 23:05:50,037 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 12.05s/it]
  6546. 2025-07-19 23:05:50,037 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 12.05s/it]
  6547. 2025-07-19 23:05:50,038 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.04s/it]
  6548. 2025-07-19 23:05:50,038 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.04s/it]
  6549. 2025-07-19 23:05:50,038 - sglang - INFO -
  6550. 2025-07-19 23:05:50,038 - __main__ - INFO -
  6551. 2025-07-19 23:05:50,305 - sglang - INFO - [2025-07-19 23:05:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6552. 2025-07-19 23:05:50,305 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  6553. 2025-07-19 23:05:50,319 - sglang - INFO - [2025-07-19 23:05:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6554. 2025-07-19 23:05:50,319 - __main__ - INFO - [2025-07-19 23:05:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  6555. 2025-07-19 23:05:50,319 - sglang - INFO - [2025-07-19 23:05:50 TP0] Memory pool end. avail mem=5.30 GB
  6556. 2025-07-19 23:05:50,319 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Memory pool end. avail mem=5.30 GB
  6557. 2025-07-19 23:05:50,611 - sglang - INFO - [2025-07-19 23:05:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  6558. 2025-07-19 23:05:50,612 - __main__ - INFO - [2025-07-19 23:05:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  6559. 2025-07-19 23:05:50,895 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  6560. 2025-07-19 23:05:51,970 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  6561. 2025-07-19 23:05:53,047 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  6562. 2025-07-19 23:05:53,309 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.59s/it] 50%|█████ | 2/4 [00:01<00:01, 1.14it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.57it/s] 100%|██████████| 4/4 [00:02<00:00, 1.88it/s] 100%|██████████| 4/4 [00:02<00:00, 1.49it/s]
  6563. 2025-07-19 23:05:53,310 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.59s/it] 50%|█████ | 2/4 [00:01<00:01, 1.14it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.57it/s] 100%|██████████| 4/4 [00:02<00:00, 1.88it/s] 100%|██████████| 4/4 [00:02<00:00, 1.49it/s]
  6564. 2025-07-19 23:05:53,310 - sglang - INFO - [2025-07-19 23:05:53 TP0] Capture cuda graph end. Time elapsed: 2.70 s
  6565. 2025-07-19 23:05:53,310 - __main__ - INFO - [2025-07-19 23:05:53 TP0] Capture cuda graph end. Time elapsed: 2.70 s
  6566. 2025-07-19 23:05:54,125 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  6567. 2025-07-19 23:05:55,193 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  6568. 2025-07-19 23:05:56,262 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  6569. 2025-07-19 23:05:56,889 - sglang - INFO - [2025-07-19 23:05:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6570. 2025-07-19 23:05:56,889 - __main__ - INFO - [2025-07-19 23:05:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  6571. 2025-07-19 23:05:57,362 - __main__ - INFO - sglang server is ready.
  6572. 2025-07-19 23:05:57,363 - __main__ - INFO - Queue remaining: 2
  6573. 2025-07-19 23:05:57,363 - __main__ - INFO -
  6574. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6575. ----------------------------------------------------------------------------------
  6576. 2025-07-19 23:05:57,363 - __main__ - INFO -
  6577. Worker ID
  6578. ---------
  6579. 2025-07-19 23:05:57,363 - __main__ - INFO - Worker 0 processing work item b903c79fc04852a9f203dfa04143731928e937aa
  6580. 2025-07-19 23:05:57,365 - __main__ - INFO - Created all tasks for b903c79fc04852a9f203dfa04143731928e937aa
  6581. 2025-07-19 23:05:57,385 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/ambiguous.pdf in worker 0
  6582. 2025-07-19 23:05:57,395 - __main__ - INFO - Got 48 pages to do for tests/gnarly_pdfs/bws_book_ch2.pdf in worker 0
  6583. 2025-07-19 23:05:57,397 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/dolma-page-1.pdf in worker 0
  6584. 2025-07-19 23:05:57,403 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/failing_anchor_pg4.pdf in worker 0
  6585. 2025-07-19 23:05:57,407 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/edgar.pdf in worker 0
  6586. 2025-07-19 23:05:57,412 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/failing_pdf_pg9.pdf in worker 0
  6587. 2025-07-19 23:05:57,427 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/form_on_later_pages.pdf in worker 0
  6588. 2025-07-19 23:05:57,434 - __main__ - INFO - Got 3 pages to do for tests/gnarly_pdfs/guidebook_failed_pages.pdf in worker 0
  6589. 2025-07-19 23:05:57,446 - __main__ - INFO - Got 29 pages to do for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf in worker 0
  6590. 2025-07-19 23:05:57,449 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/large_prompt_hint2.pdf in worker 0
  6591. 2025-07-19 23:05:57,454 - __main__ - INFO - Got 4 pages to do for tests/gnarly_pdfs/large_prompt_hint3.pdf in worker 0
  6592. 2025-07-19 23:05:57,456 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/handwriting_bad_ocr.pdf in worker 0
  6593. 2025-07-19 23:05:57,568 - __main__ - INFO - Got 27 pages to do for tests/gnarly_pdfs/large_prompt_hint1.pdf in worker 0
  6594. 2025-07-19 23:05:57,570 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/lots_of_sci_tables.pdf in worker 0
  6595. 2025-07-19 23:05:57,652 - __main__ - INFO - Got 106 pages to do for tests/gnarly_pdfs/instructions_and_schematics.pdf in worker 0
  6596. 2025-07-19 23:05:57,656 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/lots_of_chem_tables.pdf in worker 0
  6597. 2025-07-19 23:05:57,660 - __main__ - INFO - Got 7 pages to do for tests/gnarly_pdfs/most_content_in_image_form.pdf in worker 0
  6598. 2025-07-19 23:05:57,666 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/not_parsing.pdf in worker 0
  6599. 2025-07-19 23:05:57,668 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/newspaper.pdf in worker 0
  6600. 2025-07-19 23:05:57,672 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/map1.pdf in worker 0
  6601. 2025-07-19 23:05:57,682 - __main__ - INFO - Got 16 pages to do for tests/gnarly_pdfs/load_v_error.pdf in worker 0
  6602. 2025-07-19 23:05:57,698 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/not_parsing2.pdf in worker 0
  6603. 2025-07-19 23:05:57,710 - __main__ - INFO - Got 54 pages to do for tests/gnarly_pdfs/overrun_on_pg8.pdf in worker 0
  6604. 2025-07-19 23:05:57,714 - __main__ - INFO - Got 14 pages to do for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf in worker 0
  6605. 2025-07-19 23:05:57,715 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/olmo-page-1.pdf in worker 0
  6606. 2025-07-19 23:05:57,718 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf in worker 0
  6607. 2025-07-19 23:05:57,719 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/small_page_size.pdf in worker 0
  6608. 2025-07-19 23:05:58,342 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/skinnypage.pdf in worker 0
  6609. 2025-07-19 23:05:58,344 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/some_ocr1.pdf in worker 0
  6610. 2025-07-19 23:05:58,349 - __main__ - INFO - Got 26 pages to do for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf in worker 0
  6611. 2025-07-19 23:05:58,356 - __main__ - INFO - Got 40 pages to do for tests/gnarly_pdfs/ti89_guidebook_programming.pdf in worker 0
  6612. 2025-07-19 23:05:58,434 - __main__ - INFO - Got 68 pages to do for tests/gnarly_pdfs/slideshow_mostly_images.pdf in worker 0
  6613. 2025-07-19 23:05:58,646 - sglang - INFO - [2025-07-19 23:05:58 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6614. 2025-07-19 23:05:58,646 - __main__ - INFO - [2025-07-19 23:05:58 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  6615. 2025-07-19 23:05:58,647 - __main__ - INFO - sglang running req: 0 queue req: 0
  6616. 2025-07-19 23:06:04,437 - sglang - INFO - [2025-07-19 23:06:04] The server is fired up and ready to roll!
  6617. 2025-07-19 23:06:04,437 - __main__ - INFO - [2025-07-19 23:06:04] The server is fired up and ready to roll!
  6618. 2025-07-19 23:06:07,432 - __main__ - INFO - Queue remaining: 1
  6619. 2025-07-19 23:06:07,433 - __main__ - INFO -
  6620. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6621. ----------------------------------------------------------------------------------
  6622. 2025-07-19 23:06:07,433 - __main__ - INFO -
  6623. Worker ID | started
  6624. ----------+--------
  6625. 0 | 529
  6626. 2025-07-19 23:06:13,162 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ambiguous.pdf-1
  6627. 2025-07-19 23:06:13,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-1
  6628. 2025-07-19 23:06:13,264 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-2
  6629. 2025-07-19 23:06:13,355 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-4
  6630. 2025-07-19 23:06:13,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-3
  6631. 2025-07-19 23:06:13,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-5
  6632. 2025-07-19 23:06:13,463 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-6
  6633. 2025-07-19 23:06:13,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-7
  6634. 2025-07-19 23:06:13,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-8
  6635. 2025-07-19 23:06:13,558 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-10
  6636. 2025-07-19 23:06:13,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-9
  6637. 2025-07-19 23:06:13,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-11
  6638. 2025-07-19 23:06:13,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-13
  6639. 2025-07-19 23:06:13,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-12
  6640. 2025-07-19 23:06:13,647 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-14
  6641. 2025-07-19 23:06:13,669 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-15
  6642. 2025-07-19 23:06:13,674 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-17
  6643. 2025-07-19 23:06:13,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-18
  6644. 2025-07-19 23:06:13,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-16
  6645. 2025-07-19 23:06:13,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-19
  6646. 2025-07-19 23:06:13,750 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-21
  6647. 2025-07-19 23:06:13,761 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-20
  6648. 2025-07-19 23:06:13,766 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-23
  6649. 2025-07-19 23:06:13,768 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-22
  6650. 2025-07-19 23:06:13,769 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-25
  6651. 2025-07-19 23:06:13,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-24
  6652. 2025-07-19 23:06:13,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-26
  6653. 2025-07-19 23:06:13,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-27
  6654. 2025-07-19 23:06:13,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-28
  6655. 2025-07-19 23:06:13,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-30
  6656. 2025-07-19 23:06:13,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-33
  6657. 2025-07-19 23:06:13,861 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-31
  6658. 2025-07-19 23:06:13,862 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-29
  6659. 2025-07-19 23:06:13,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-34
  6660. 2025-07-19 23:06:13,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-32
  6661. 2025-07-19 23:06:13,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-35
  6662. 2025-07-19 23:06:13,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-36
  6663. 2025-07-19 23:06:13,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-37
  6664. 2025-07-19 23:06:13,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-41
  6665. 2025-07-19 23:06:13,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-38
  6666. 2025-07-19 23:06:13,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-39
  6667. 2025-07-19 23:06:13,952 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-40
  6668. 2025-07-19 23:06:13,967 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-44
  6669. 2025-07-19 23:06:14,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-42
  6670. 2025-07-19 23:06:14,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-45
  6671. 2025-07-19 23:06:14,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-43
  6672. 2025-07-19 23:06:14,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-48
  6673. 2025-07-19 23:06:14,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-46
  6674. 2025-07-19 23:06:14,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-47
  6675. 2025-07-19 23:06:14,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-1
  6676. 2025-07-19 23:06:14,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-2
  6677. 2025-07-19 23:06:14,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-3
  6678. 2025-07-19 23:06:14,155 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-7
  6679. 2025-07-19 23:06:14,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-8
  6680. 2025-07-19 23:06:14,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-4
  6681. 2025-07-19 23:06:14,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/edgar.pdf-1
  6682. 2025-07-19 23:06:14,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-1
  6683. 2025-07-19 23:06:14,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-6
  6684. 2025-07-19 23:06:14,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-2
  6685. 2025-07-19 23:06:14,249 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-9
  6686. 2025-07-19 23:06:14,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-4
  6687. 2025-07-19 23:06:14,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-5
  6688. 2025-07-19 23:06:14,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-8
  6689. 2025-07-19 23:06:14,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-6
  6690. 2025-07-19 23:06:14,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-3
  6691. 2025-07-19 23:06:14,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-7
  6692. 2025-07-19 23:06:14,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-4
  6693. 2025-07-19 23:06:14,363 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-3
  6694. 2025-07-19 23:06:14,435 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-5
  6695. 2025-07-19 23:06:14,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-7
  6696. 2025-07-19 23:06:14,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-2
  6697. 2025-07-19 23:06:14,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-6
  6698. 2025-07-19 23:06:14,453 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-8
  6699. 2025-07-19 23:06:14,461 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-10
  6700. 2025-07-19 23:06:14,537 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-1
  6701. 2025-07-19 23:06:14,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-9
  6702. 2025-07-19 23:06:14,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-2
  6703. 2025-07-19 23:06:14,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2
  6704. 2025-07-19 23:06:14,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6
  6705. 2025-07-19 23:06:14,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5
  6706. 2025-07-19 23:06:14,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10
  6707. 2025-07-19 23:06:14,653 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4
  6708. 2025-07-19 23:06:14,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12
  6709. 2025-07-19 23:06:14,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9
  6710. 2025-07-19 23:06:14,656 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-5
  6711. 2025-07-19 23:06:14,662 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14
  6712. 2025-07-19 23:06:14,735 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13
  6713. 2025-07-19 23:06:14,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18
  6714. 2025-07-19 23:06:14,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15
  6715. 2025-07-19 23:06:14,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-1
  6716. 2025-07-19 23:06:14,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21
  6717. 2025-07-19 23:06:14,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20
  6718. 2025-07-19 23:06:14,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1
  6719. 2025-07-19 23:06:14,842 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24
  6720. 2025-07-19 23:06:14,855 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25
  6721. 2025-07-19 23:06:14,856 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23
  6722. 2025-07-19 23:06:14,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11
  6723. 2025-07-19 23:06:14,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19
  6724. 2025-07-19 23:06:14,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17
  6725. 2025-07-19 23:06:14,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27
  6726. 2025-07-19 23:06:14,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16
  6727. 2025-07-19 23:06:14,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26
  6728. 2025-07-19 23:06:14,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29
  6729. 2025-07-19 23:06:14,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28
  6730. 2025-07-19 23:06:14,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-6
  6731. 2025-07-19 23:06:14,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-4
  6732. 2025-07-19 23:06:14,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-3
  6733. 2025-07-19 23:06:15,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8
  6734. 2025-07-19 23:06:15,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-3
  6735. 2025-07-19 23:06:15,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-2
  6736. 2025-07-19 23:06:15,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22
  6737. 2025-07-19 23:06:15,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-3
  6738. 2025-07-19 23:06:15,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-1
  6739. 2025-07-19 23:06:15,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-1
  6740. 2025-07-19 23:06:15,054 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-4
  6741. 2025-07-19 23:06:15,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3
  6742. 2025-07-19 23:06:15,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-5
  6743. 2025-07-19 23:06:15,160 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7
  6744. 2025-07-19 23:06:15,644 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-2
  6745. 2025-07-19 23:06:16,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-2
  6746. 2025-07-19 23:06:16,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-4
  6747. 2025-07-19 23:06:16,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-12
  6748. 2025-07-19 23:06:16,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-16
  6749. 2025-07-19 23:06:16,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-1
  6750. 2025-07-19 23:06:16,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-19
  6751. 2025-07-19 23:06:16,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-9
  6752. 2025-07-19 23:06:16,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-5
  6753. 2025-07-19 23:06:16,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-1
  6754. 2025-07-19 23:06:16,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-7
  6755. 2025-07-19 23:06:16,745 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-15
  6756. 2025-07-19 23:06:16,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-6
  6757. 2025-07-19 23:06:16,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-14
  6758. 2025-07-19 23:06:16,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-1
  6759. 2025-07-19 23:06:16,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-2
  6760. 2025-07-19 23:06:16,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-3
  6761. 2025-07-19 23:06:16,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-18
  6762. 2025-07-19 23:06:17,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-3
  6763. 2025-07-19 23:06:17,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-6
  6764. 2025-07-19 23:06:17,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-13
  6765. 2025-07-19 23:06:17,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-4
  6766. 2025-07-19 23:06:17,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/dolma-page-1.pdf-1
  6767. 2025-07-19 23:06:17,134 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-5
  6768. 2025-07-19 23:06:17,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-10
  6769. 2025-07-19 23:06:17,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-11
  6770. 2025-07-19 23:06:17,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-9
  6771. 2025-07-19 23:06:17,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-20
  6772. 2025-07-19 23:06:17,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-23
  6773. 2025-07-19 23:06:17,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-12
  6774. 2025-07-19 23:06:17,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-13
  6775. 2025-07-19 23:06:17,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-8
  6776. 2025-07-19 23:06:17,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-7
  6777. 2025-07-19 23:06:17,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-22
  6778. 2025-07-19 23:06:17,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-19
  6779. 2025-07-19 23:06:17,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-15
  6780. 2025-07-19 23:06:17,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-10
  6781. 2025-07-19 23:06:17,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-25
  6782. 2025-07-19 23:06:17,433 - __main__ - INFO - Queue remaining: 1
  6783. 2025-07-19 23:06:17,434 - __main__ - INFO -
  6784. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  6785. ----------------------------------------------------------------------------------
  6786. 2025-07-19 23:06:17,434 - __main__ - INFO -
  6787. Worker ID | started
  6788. ----------+--------
  6789. 0 | 529
  6790. 2025-07-19 23:06:17,434 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-18
  6791. 2025-07-19 23:06:17,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-17
  6792. 2025-07-19 23:06:17,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-2
  6793. 2025-07-19 23:06:17,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-23
  6794. 2025-07-19 23:06:17,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-22
  6795. 2025-07-19 23:06:17,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-24
  6796. 2025-07-19 23:06:17,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-6
  6797. 2025-07-19 23:06:17,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-21
  6798. 2025-07-19 23:06:17,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-21
  6799. 2025-07-19 23:06:17,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-26
  6800. 2025-07-19 23:06:17,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-27
  6801. 2025-07-19 23:06:17,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-25
  6802. 2025-07-19 23:06:17,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-28
  6803. 2025-07-19 23:06:17,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-14
  6804. 2025-07-19 23:06:17,658 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-3
  6805. 2025-07-19 23:06:17,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-4
  6806. 2025-07-19 23:06:17,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-35
  6807. 2025-07-19 23:06:17,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-29
  6808. 2025-07-19 23:06:17,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-31
  6809. 2025-07-19 23:06:17,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-34
  6810. 2025-07-19 23:06:17,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-32
  6811. 2025-07-19 23:06:17,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-20
  6812. 2025-07-19 23:06:17,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-33
  6813. 2025-07-19 23:06:17,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-37
  6814. 2025-07-19 23:06:17,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-41
  6815. 2025-07-19 23:06:17,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-16
  6816. 2025-07-19 23:06:17,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-26
  6817. 2025-07-19 23:06:17,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-38
  6818. 2025-07-19 23:06:17,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-30
  6819. 2025-07-19 23:06:17,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-39
  6820. 2025-07-19 23:06:17,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-36
  6821. 2025-07-19 23:06:17,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-45
  6822. 2025-07-19 23:06:17,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-42
  6823. 2025-07-19 23:06:17,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-8
  6824. 2025-07-19 23:06:17,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-43
  6825. 2025-07-19 23:06:17,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-40
  6826. 2025-07-19 23:06:17,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-47
  6827. 2025-07-19 23:06:18,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-27
  6828. 2025-07-19 23:06:18,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-5
  6829. 2025-07-19 23:06:18,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-56
  6830. 2025-07-19 23:06:18,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-50
  6831. 2025-07-19 23:06:18,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-51
  6832. 2025-07-19 23:06:18,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-11
  6833. 2025-07-19 23:06:18,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-58
  6834. 2025-07-19 23:06:18,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-52
  6835. 2025-07-19 23:06:18,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-17
  6836. 2025-07-19 23:06:18,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-49
  6837. 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-65
  6838. 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-62
  6839. 2025-07-19 23:06:18,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-44
  6840. 2025-07-19 23:06:18,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-59
  6841. 2025-07-19 23:06:18,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-67
  6842. 2025-07-19 23:06:18,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-48
  6843. 2025-07-19 23:06:18,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-70
  6844. 2025-07-19 23:06:18,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-24
  6845. 2025-07-19 23:06:18,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-61
  6846. 2025-07-19 23:06:18,353 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-54
  6847. 2025-07-19 23:06:18,433 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-68
  6848. 2025-07-19 23:06:18,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-64
  6849. 2025-07-19 23:06:18,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-71
  6850. 2025-07-19 23:06:18,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-63
  6851. 2025-07-19 23:06:18,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-57
  6852. 2025-07-19 23:06:18,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-73
  6853. 2025-07-19 23:06:18,454 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-72
  6854. 2025-07-19 23:06:18,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-74
  6855. 2025-07-19 23:06:18,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-46
  6856. 2025-07-19 23:06:18,544 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-77
  6857. 2025-07-19 23:06:18,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-83
  6858. 2025-07-19 23:06:18,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-66
  6859. 2025-07-19 23:06:18,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-60
  6860. 2025-07-19 23:06:18,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-79
  6861. 2025-07-19 23:06:18,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-80
  6862. 2025-07-19 23:06:18,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-55
  6863. 2025-07-19 23:06:18,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-1
  6864. 2025-07-19 23:06:18,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-94
  6865. 2025-07-19 23:06:18,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-89
  6866. 2025-07-19 23:06:18,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-53
  6867. 2025-07-19 23:06:18,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-87
  6868. 2025-07-19 23:06:18,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-69
  6869. 2025-07-19 23:06:18,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-2
  6870. 2025-07-19 23:06:18,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-88
  6871. 2025-07-19 23:06:18,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-3
  6872. 2025-07-19 23:06:18,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-5
  6873. 2025-07-19 23:06:18,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-98
  6874. 2025-07-19 23:06:18,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-2
  6875. 2025-07-19 23:06:18,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-1
  6876. 2025-07-19 23:06:18,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-5
  6877. 2025-07-19 23:06:18,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-85
  6878. 2025-07-19 23:06:18,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-4
  6879. 2025-07-19 23:06:19,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-6
  6880. 2025-07-19 23:06:19,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-92
  6881. 2025-07-19 23:06:19,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-3
  6882. 2025-07-19 23:06:19,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-7
  6883. 2025-07-19 23:06:19,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-4
  6884. 2025-07-19 23:06:19,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-9
  6885. 2025-07-19 23:06:19,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-101
  6886. 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-91
  6887. 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-95
  6888. 2025-07-19 23:06:19,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-6
  6889. 2025-07-19 23:06:19,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-76
  6890. 2025-07-19 23:06:19,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-99
  6891. 2025-07-19 23:06:19,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-2
  6892. 2025-07-19 23:06:19,234 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1
  6893. 2025-07-19 23:06:19,239 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-7
  6894. 2025-07-19 23:06:19,241 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-8
  6895. 2025-07-19 23:06:19,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-6
  6896. 2025-07-19 23:06:19,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-84
  6897. 2025-07-19 23:06:19,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-3
  6898. 2025-07-19 23:06:19,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-78
  6899. 2025-07-19 23:06:19,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-4
  6900. 2025-07-19 23:06:19,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-5
  6901. 2025-07-19 23:06:19,333 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-4
  6902. 2025-07-19 23:06:19,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-3
  6903. 2025-07-19 23:06:19,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-1
  6904. 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-8
  6905. 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-106
  6906. 2025-07-19 23:06:19,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-86
  6907. 2025-07-19 23:06:19,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-93
  6908. 2025-07-19 23:06:19,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-81
  6909. 2025-07-19 23:06:19,347 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-1
  6910. 2025-07-19 23:06:19,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-14
  6911. 2025-07-19 23:06:19,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-16
  6912. 2025-07-19 23:06:19,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-2
  6913. 2025-07-19 23:06:19,433 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-15
  6914. 2025-07-19 23:06:19,434 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-6
  6915. 2025-07-19 23:06:19,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-13
  6916. 2025-07-19 23:06:19,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-75
  6917. 2025-07-19 23:06:19,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-7
  6918. 2025-07-19 23:06:19,452 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-5
  6919. 2025-07-19 23:06:19,453 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-96
  6920. 2025-07-19 23:06:19,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-102
  6921. 2025-07-19 23:06:19,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-100
  6922. 2025-07-19 23:06:19,537 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-1
  6923. 2025-07-19 23:06:19,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-4
  6924. 2025-07-19 23:06:19,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-10
  6925. 2025-07-19 23:06:19,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-5
  6926. 2025-07-19 23:06:19,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-3
  6927. 2025-07-19 23:06:19,647 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-7
  6928. 2025-07-19 23:06:19,649 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-9
  6929. 2025-07-19 23:06:19,652 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-8
  6930. 2025-07-19 23:06:19,733 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-3
  6931. 2025-07-19 23:06:19,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-6
  6932. 2025-07-19 23:06:19,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-103
  6933. 2025-07-19 23:06:19,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-7
  6934. 2025-07-19 23:06:19,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-12
  6935. 2025-07-19 23:06:19,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-105
  6936. 2025-07-19 23:06:19,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-10
  6937. 2025-07-19 23:06:19,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-82
  6938. 2025-07-19 23:06:19,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-11
  6939. 2025-07-19 23:06:19,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-21
  6940. 2025-07-19 23:06:19,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-8
  6941. 2025-07-19 23:06:19,846 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-5
  6942. 2025-07-19 23:06:19,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-13
  6943. 2025-07-19 23:06:19,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-16
  6944. 2025-07-19 23:06:19,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-15
  6945. 2025-07-19 23:06:19,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-14
  6946. 2025-07-19 23:06:19,936 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-97
  6947. 2025-07-19 23:06:19,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-104
  6948. 2025-07-19 23:06:19,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-9
  6949. 2025-07-19 23:06:19,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-22
  6950. 2025-07-19 23:06:19,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-24
  6951. 2025-07-19 23:06:19,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-20
  6952. 2025-07-19 23:06:19,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-17
  6953. 2025-07-19 23:06:19,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-18
  6954. 2025-07-19 23:06:19,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-90
  6955. 2025-07-19 23:06:19,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-2
  6956. 2025-07-19 23:06:19,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-11
  6957. 2025-07-19 23:06:20,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-23
  6958. 2025-07-19 23:06:20,033 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-19
  6959. 2025-07-19 23:06:20,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-28
  6960. 2025-07-19 23:06:20,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-26
  6961. 2025-07-19 23:06:20,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-25
  6962. 2025-07-19 23:06:20,049 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-4
  6963. 2025-07-19 23:06:20,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-30
  6964. 2025-07-19 23:06:20,055 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-27
  6965. 2025-07-19 23:06:20,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-37
  6966. 2025-07-19 23:06:20,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-31
  6967. 2025-07-19 23:06:20,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-36
  6968. 2025-07-19 23:06:20,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-32
  6969. 2025-07-19 23:06:20,145 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-41
  6970. 2025-07-19 23:06:20,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-6
  6971. 2025-07-19 23:06:20,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-39
  6972. 2025-07-19 23:06:20,159 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-29
  6973. 2025-07-19 23:06:20,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3
  6974. 2025-07-19 23:06:20,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-40
  6975. 2025-07-19 23:06:20,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-35
  6976. 2025-07-19 23:06:20,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-9
  6977. 2025-07-19 23:06:20,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-45
  6978. 2025-07-19 23:06:20,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-48
  6979. 2025-07-19 23:06:20,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-46
  6980. 2025-07-19 23:06:20,246 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4
  6981. 2025-07-19 23:06:20,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-47
  6982. 2025-07-19 23:06:20,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-43
  6983. 2025-07-19 23:06:20,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-50
  6984. 2025-07-19 23:06:20,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-52
  6985. 2025-07-19 23:06:20,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7
  6986. 2025-07-19 23:06:20,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-51
  6987. 2025-07-19 23:06:20,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-42
  6988. 2025-07-19 23:06:20,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-54
  6989. 2025-07-19 23:06:20,348 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-34
  6990. 2025-07-19 23:06:20,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-7
  6991. 2025-07-19 23:06:20,353 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-2
  6992. 2025-07-19 23:06:20,438 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-8
  6993. 2025-07-19 23:06:20,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-44
  6994. 2025-07-19 23:06:20,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-12
  6995. 2025-07-19 23:06:20,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2
  6996. 2025-07-19 23:06:20,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-49
  6997. 2025-07-19 23:06:20,550 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-38
  6998. 2025-07-19 23:06:20,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-33
  6999. 2025-07-19 23:06:20,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-1
  7000. 2025-07-19 23:06:20,655 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2
  7001. 2025-07-19 23:06:20,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/newspaper.pdf-1
  7002. 2025-07-19 23:06:20,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-53
  7003. 2025-07-19 23:06:21,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1
  7004. 2025-07-19 23:06:21,175 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3
  7005. 2025-07-19 23:06:21,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1
  7006. 2025-07-19 23:06:21,268 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10
  7007. 2025-07-19 23:06:21,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4
  7008. 2025-07-19 23:06:21,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  7009. 2025-07-19 23:06:21,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2
  7010. 2025-07-19 23:06:21,343 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6
  7011. 2025-07-19 23:06:21,362 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8
  7012. 2025-07-19 23:06:21,437 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6
  7013. 2025-07-19 23:06:21,484 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3
  7014. 2025-07-19 23:06:21,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/some_ocr1.pdf-1
  7015. 2025-07-19 23:06:21,557 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1
  7016. 2025-07-19 23:06:21,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14
  7017. 2025-07-19 23:06:21,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14
  7018. 2025-07-19 23:06:21,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8
  7019. 2025-07-19 23:06:21,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5
  7020. 2025-07-19 23:06:21,685 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10
  7021. 2025-07-19 23:06:21,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11
  7022. 2025-07-19 23:06:21,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7
  7023. 2025-07-19 23:06:21,880 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12
  7024. 2025-07-19 23:06:21,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13
  7025. 2025-07-19 23:06:21,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15
  7026. 2025-07-19 23:06:21,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21
  7027. 2025-07-19 23:06:21,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4
  7028. 2025-07-19 23:06:21,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5
  7029. 2025-07-19 23:06:21,972 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22
  7030. 2025-07-19 23:06:22,065 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16
  7031. 2025-07-19 23:06:22,069 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13
  7032. 2025-07-19 23:06:22,072 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17
  7033. 2025-07-19 23:06:22,074 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12
  7034. 2025-07-19 23:06:22,086 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26
  7035. 2025-07-19 23:06:22,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9
  7036. 2025-07-19 23:06:22,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5
  7037. 2025-07-19 23:06:22,149 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-1
  7038. 2025-07-19 23:06:22,167 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8
  7039. 2025-07-19 23:06:22,176 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11
  7040. 2025-07-19 23:06:22,237 - __main__ - INFO - Built page query for tests/gnarly_pdfs/olmo-page-1.pdf-1
  7041. 2025-07-19 23:06:22,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10
  7042. 2025-07-19 23:06:22,250 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3
  7043. 2025-07-19 23:06:22,333 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1
  7044. 2025-07-19 23:06:22,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9
  7045. 2025-07-19 23:06:22,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7
  7046. 2025-07-19 23:06:22,342 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18
  7047. 2025-07-19 23:06:22,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6
  7048. 2025-07-19 23:06:22,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2
  7049. 2025-07-19 23:06:22,383 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6
  7050. 2025-07-19 23:06:22,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7
  7051. 2025-07-19 23:06:22,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20
  7052. 2025-07-19 23:06:22,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8
  7053. 2025-07-19 23:06:22,475 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24
  7054. 2025-07-19 23:06:22,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4
  7055. 2025-07-19 23:06:22,544 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9
  7056. 2025-07-19 23:06:22,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16
  7057. 2025-07-19 23:06:22,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12
  7058. 2025-07-19 23:06:22,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5
  7059. 2025-07-19 23:06:22,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2
  7060. 2025-07-19 23:06:22,572 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17
  7061. 2025-07-19 23:06:22,634 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19
  7062. 2025-07-19 23:06:22,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25
  7063. 2025-07-19 23:06:22,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10
  7064. 2025-07-19 23:06:22,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24
  7065. 2025-07-19 23:06:22,656 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18
  7066. 2025-07-19 23:06:22,664 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19
  7067. 2025-07-19 23:06:22,666 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13
  7068. 2025-07-19 23:06:22,670 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25
  7069. 2025-07-19 23:06:22,735 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22
  7070. 2025-07-19 23:06:22,737 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23
  7071. 2025-07-19 23:06:22,778 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29
  7072. 2025-07-19 23:06:22,873 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26
  7073. 2025-07-19 23:06:22,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9
  7074. 2025-07-19 23:06:22,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32
  7075. 2025-07-19 23:06:22,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14
  7076. 2025-07-19 23:06:22,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20
  7077. 2025-07-19 23:06:22,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15
  7078. 2025-07-19 23:06:22,957 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37
  7079. 2025-07-19 23:06:22,965 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36
  7080. 2025-07-19 23:06:23,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-1
  7081. 2025-07-19 23:06:23,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21
  7082. 2025-07-19 23:06:23,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-3
  7083. 2025-07-19 23:06:23,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11
  7084. 2025-07-19 23:06:23,061 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-6
  7085. 2025-07-19 23:06:23,077 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-5
  7086. 2025-07-19 23:06:23,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40
  7087. 2025-07-19 23:06:23,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34
  7088. 2025-07-19 23:06:23,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39
  7089. 2025-07-19 23:06:23,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33
  7090. 2025-07-19 23:06:23,152 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23
  7091. 2025-07-19 23:06:23,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38
  7092. 2025-07-19 23:06:23,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28
  7093. 2025-07-19 23:06:23,162 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31
  7094. 2025-07-19 23:06:23,174 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27
  7095. 2025-07-19 23:06:23,264 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-8
  7096. 2025-07-19 23:06:23,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35
  7097. 2025-07-19 23:06:23,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30
  7098. 2025-07-19 23:06:23,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-4
  7099. 2025-07-19 23:06:23,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-35
  7100. 2025-07-19 23:06:23,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-18
  7101. 2025-07-19 23:06:23,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-7
  7102. 2025-07-19 23:06:23,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-36
  7103. 2025-07-19 23:06:23,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-19
  7104. 2025-07-19 23:06:23,574 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-10
  7105. 2025-07-19 23:06:23,575 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-2
  7106. 2025-07-19 23:06:23,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-12
  7107. 2025-07-19 23:06:23,657 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-17
  7108. 2025-07-19 23:06:23,664 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-16
  7109. 2025-07-19 23:06:23,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-31
  7110. 2025-07-19 23:06:23,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-27
  7111. 2025-07-19 23:06:23,777 - __main__ - INFO - Built page query for tests/gnarly_pdfs/small_page_size.pdf-1
  7112. 2025-07-19 23:06:23,946 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-9
  7113. 2025-07-19 23:06:23,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-22
  7114. 2025-07-19 23:06:23,962 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-48
  7115. 2025-07-19 23:06:23,974 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-42
  7116. 2025-07-19 23:06:24,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-41
  7117. 2025-07-19 23:06:24,057 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-14
  7118. 2025-07-19 23:06:24,076 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-39
  7119. 2025-07-19 23:06:24,077 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-38
  7120. 2025-07-19 23:06:24,138 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-13
  7121. 2025-07-19 23:06:24,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-43
  7122. 2025-07-19 23:06:24,161 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-29
  7123. 2025-07-19 23:06:24,167 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-40
  7124. 2025-07-19 23:06:24,170 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-24
  7125. 2025-07-19 23:06:24,176 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-46
  7126. 2025-07-19 23:06:24,246 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-37
  7127. 2025-07-19 23:06:24,269 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-23
  7128. 2025-07-19 23:06:24,276 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-20
  7129. 2025-07-19 23:06:24,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-30
  7130. 2025-07-19 23:06:24,363 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-55
  7131. 2025-07-19 23:06:24,665 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-11
  7132. 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-15
  7133. 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-50
  7134. 2025-07-19 23:06:24,895 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-44
  7135. 2025-07-19 23:06:24,923 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-28
  7136. 2025-07-19 23:06:24,923 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-45
  7137. 2025-07-19 23:06:24,924 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-32
  7138. 2025-07-19 23:06:24,958 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-21
  7139. 2025-07-19 23:06:24,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-49
  7140. 2025-07-19 23:06:24,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-26
  7141. 2025-07-19 23:06:24,960 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-47
  7142. 2025-07-19 23:06:24,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-68
  7143. 2025-07-19 23:06:24,961 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-33
  7144. 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-25
  7145. 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-53
  7146. 2025-07-19 23:06:25,005 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-54
  7147. 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-61
  7148. 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-67
  7149. 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-57
  7150. 2025-07-19 23:06:25,006 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-34
  7151. 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-56
  7152. 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-62
  7153. 2025-07-19 23:06:25,007 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-51
  7154. 2025-07-19 23:06:25,008 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-52
  7155. 2025-07-19 23:06:25,021 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-60
  7156. 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-59
  7157. 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-64
  7158. 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-66
  7159. 2025-07-19 23:06:25,022 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-58
  7160. 2025-07-19 23:06:25,023 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-65
  7161. 2025-07-19 23:06:25,023 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-63
  7162. 2025-07-19 23:06:27,435 - __main__ - INFO - Queue remaining: 1
  7163. 2025-07-19 23:06:27,435 - __main__ - INFO -
  7164. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  7165. ----------------------------------------------------------------------------------
  7166. 2025-07-19 23:06:27,436 - __main__ - INFO -
  7167. Worker ID | started
  7168. ----------+--------
  7169. 0 | 529
  7170. 2025-07-19 23:06:27,649 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  7171. 2025-07-19 23:06:31,151 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
  7172. 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26 cancelled
  7173. 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29 cancelled
  7174. 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8 cancelled
  7175. 2025-07-19 23:06:34,451 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18 cancelled
  7176. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9 cancelled
  7177. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19 cancelled
  7178. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10 cancelled
  7179. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20 cancelled
  7180. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11 cancelled
  7181. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21 cancelled
  7182. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1 cancelled
  7183. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12 cancelled
  7184. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22 cancelled
  7185. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2 cancelled
  7186. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13 cancelled
  7187. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23 cancelled
  7188. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3 cancelled
  7189. 2025-07-19 23:06:34,452 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14 cancelled
  7190. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24 cancelled
  7191. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4 cancelled
  7192. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15 cancelled
  7193. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25 cancelled
  7194. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5 cancelled
  7195. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16 cancelled
  7196. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27 cancelled
  7197. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6 cancelled
  7198. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17 cancelled
  7199. 2025-07-19 23:06:34,533 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28 cancelled
  7200. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7 cancelled
  7201. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40 cancelled
  7202. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18 cancelled
  7203. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9 cancelled
  7204. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30 cancelled
  7205. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19 cancelled
  7206. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10 cancelled
  7207. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31 cancelled
  7208. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20 cancelled
  7209. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11 cancelled
  7210. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32 cancelled
  7211. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1 cancelled
  7212. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21 cancelled
  7213. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12 cancelled
  7214. 2025-07-19 23:06:34,534 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33 cancelled
  7215. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2 cancelled
  7216. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22 cancelled
  7217. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13 cancelled
  7218. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34 cancelled
  7219. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3 cancelled
  7220. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23 cancelled
  7221. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14 cancelled
  7222. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35 cancelled
  7223. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4 cancelled
  7224. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24 cancelled
  7225. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15 cancelled
  7226. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36 cancelled
  7227. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5 cancelled
  7228. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25 cancelled
  7229. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16 cancelled
  7230. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37 cancelled
  7231. 2025-07-19 23:06:34,535 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6 cancelled
  7232. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27 cancelled
  7233. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17 cancelled
  7234. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38 cancelled
  7235. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7 cancelled
  7236. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28 cancelled
  7237. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29 cancelled
  7238. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26 cancelled
  7239. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39 cancelled
  7240. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8 cancelled
  7241. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-1 cancelled
  7242. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-4 cancelled
  7243. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-7 cancelled
  7244. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-2 cancelled
  7245. 2025-07-19 23:06:34,536 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-5 cancelled
  7246. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-8 cancelled
  7247. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-3 cancelled
  7248. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-6 cancelled
  7249. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-5 cancelled
  7250. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-3 cancelled
  7251. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-6 cancelled
  7252. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-1 cancelled
  7253. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-4 cancelled
  7254. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-2 cancelled
  7255. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-1 cancelled
  7256. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-4 cancelled
  7257. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-7 cancelled
  7258. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-2 cancelled
  7259. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-5 cancelled
  7260. 2025-07-19 23:06:34,537 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-8 cancelled
  7261. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-3 cancelled
  7262. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-6 cancelled
  7263. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-8 cancelled
  7264. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-3 cancelled
  7265. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-6 cancelled
  7266. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-1 cancelled
  7267. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-9 cancelled
  7268. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-4 cancelled
  7269. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-7 cancelled
  7270. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-2 cancelled
  7271. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-5 cancelled
  7272. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-5 cancelled
  7273. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-8 cancelled
  7274. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-3 cancelled
  7275. 2025-07-19 23:06:34,538 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-6 cancelled
  7276. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-1 cancelled
  7277. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-9 cancelled
  7278. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-4 cancelled
  7279. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-7 cancelled
  7280. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-2 cancelled
  7281. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/ambiguous.pdf-1 cancelled
  7282. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-26 cancelled
  7283. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-29 cancelled
  7284. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-8 cancelled
  7285. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-40 cancelled
  7286. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-18 cancelled
  7287. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-30 cancelled
  7288. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-9 cancelled
  7289. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-41 cancelled
  7290. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-19 cancelled
  7291. 2025-07-19 23:06:34,539 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-31 cancelled
  7292. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-10 cancelled
  7293. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-42 cancelled
  7294. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-20 cancelled
  7295. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-32 cancelled
  7296. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-11 cancelled
  7297. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-43 cancelled
  7298. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-21 cancelled
  7299. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-1 cancelled
  7300. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-33 cancelled
  7301. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-12 cancelled
  7302. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-44 cancelled
  7303. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-22 cancelled
  7304. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-2 cancelled
  7305. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-34 cancelled
  7306. 2025-07-19 23:06:34,540 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-13 cancelled
  7307. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-45 cancelled
  7308. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-23 cancelled
  7309. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-3 cancelled
  7310. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-35 cancelled
  7311. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-14 cancelled
  7312. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-46 cancelled
  7313. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-24 cancelled
  7314. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-4 cancelled
  7315. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-36 cancelled
  7316. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-15 cancelled
  7317. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-39 cancelled
  7318. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-47 cancelled
  7319. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-25 cancelled
  7320. 2025-07-19 23:06:34,541 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-5 cancelled
  7321. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-37 cancelled
  7322. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-16 cancelled
  7323. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-48 cancelled
  7324. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-27 cancelled
  7325. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-6 cancelled
  7326. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-38 cancelled
  7327. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-17 cancelled
  7328. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-28 cancelled
  7329. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-7 cancelled
  7330. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/edgar.pdf-1 cancelled
  7331. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/olmo-page-1.pdf-1 cancelled
  7332. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/dolma-page-1.pdf-1 cancelled
  7333. 2025-07-19 23:06:34,542 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-31 cancelled
  7334. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-10 cancelled
  7335. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-42 cancelled
  7336. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-20 cancelled
  7337. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-53 cancelled
  7338. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-32 cancelled
  7339. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-11 cancelled
  7340. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-43 cancelled
  7341. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-1 cancelled
  7342. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-21 cancelled
  7343. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-54 cancelled
  7344. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-33 cancelled
  7345. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-12 cancelled
  7346. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-44 cancelled
  7347. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-2 cancelled
  7348. 2025-07-19 23:06:34,543 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-22 cancelled
  7349. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-34 cancelled
  7350. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-13 cancelled
  7351. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-45 cancelled
  7352. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-3 cancelled
  7353. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-23 cancelled
  7354. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-35 cancelled
  7355. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-14 cancelled
  7356. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-46 cancelled
  7357. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-4 cancelled
  7358. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-24 cancelled
  7359. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-36 cancelled
  7360. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-52 cancelled
  7361. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-15 cancelled
  7362. 2025-07-19 23:06:34,544 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-47 cancelled
  7363. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-5 cancelled
  7364. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-25 cancelled
  7365. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-37 cancelled
  7366. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-16 cancelled
  7367. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-48 cancelled
  7368. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-6 cancelled
  7369. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-27 cancelled
  7370. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-38 cancelled
  7371. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-17 cancelled
  7372. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-49 cancelled
  7373. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-7 cancelled
  7374. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-28 cancelled
  7375. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-39 cancelled
  7376. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-26 cancelled
  7377. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-50 cancelled
  7378. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-8 cancelled
  7379. 2025-07-19 23:06:34,545 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-29 cancelled
  7380. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-40 cancelled
  7381. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-18 cancelled
  7382. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-51 cancelled
  7383. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-9 cancelled
  7384. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-30 cancelled
  7385. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-41 cancelled
  7386. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-19 cancelled
  7387. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-4 cancelled
  7388. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-7 cancelled
  7389. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-2 cancelled
  7390. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-5 cancelled
  7391. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-8 cancelled
  7392. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-3 cancelled
  7393. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-9 cancelled
  7394. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-6 cancelled
  7395. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-1 cancelled
  7396. 2025-07-19 23:06:34,546 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-6 cancelled
  7397. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-1 cancelled
  7398. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-4 cancelled
  7399. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-2 cancelled
  7400. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-5 cancelled
  7401. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-3 cancelled
  7402. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1 cancelled
  7403. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9 cancelled
  7404. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14 cancelled
  7405. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4 cancelled
  7406. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12 cancelled
  7407. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7 cancelled
  7408. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2 cancelled
  7409. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10 cancelled
  7410. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11 cancelled
  7411. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5 cancelled
  7412. 2025-07-19 23:06:34,547 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13 cancelled
  7413. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8 cancelled
  7414. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3 cancelled
  7415. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6 cancelled
  7416. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-2 cancelled
  7417. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-3 cancelled
  7418. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-1 cancelled
  7419. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-1 cancelled
  7420. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-3 cancelled
  7421. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-2 cancelled
  7422. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-4 cancelled
  7423. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-9 cancelled
  7424. 2025-07-19 23:06:34,548 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-19 cancelled
  7425. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-10 cancelled
  7426. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-20 cancelled
  7427. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-11 cancelled
  7428. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-21 cancelled
  7429. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-1 cancelled
  7430. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-12 cancelled
  7431. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-22 cancelled
  7432. 2025-07-19 23:06:34,633 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-2 cancelled
  7433. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-13 cancelled
  7434. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-23 cancelled
  7435. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-3 cancelled
  7436. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-14 cancelled
  7437. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-24 cancelled
  7438. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-4 cancelled
  7439. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-15 cancelled
  7440. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-25 cancelled
  7441. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-5 cancelled
  7442. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-16 cancelled
  7443. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-27 cancelled
  7444. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-6 cancelled
  7445. 2025-07-19 23:06:34,634 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-17 cancelled
  7446. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-7 cancelled
  7447. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-26 cancelled
  7448. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-8 cancelled
  7449. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-18 cancelled
  7450. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-2 cancelled
  7451. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-1 cancelled
  7452. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-7 cancelled
  7453. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-15 cancelled
  7454. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-12 cancelled
  7455. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-2 cancelled
  7456. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-10 cancelled
  7457. 2025-07-19 23:06:34,635 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-5 cancelled
  7458. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-13 cancelled
  7459. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-8 cancelled
  7460. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-9 cancelled
  7461. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-16 cancelled
  7462. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-3 cancelled
  7463. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-11 cancelled
  7464. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-6 cancelled
  7465. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-14 cancelled
  7466. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-1 cancelled
  7467. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-4 cancelled
  7468. 2025-07-19 23:06:34,636 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13 cancelled
  7469. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23 cancelled
  7470. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3 cancelled
  7471. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14 cancelled
  7472. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24 cancelled
  7473. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4 cancelled
  7474. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15 cancelled
  7475. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25 cancelled
  7476. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5 cancelled
  7477. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16 cancelled
  7478. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6 cancelled
  7479. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17 cancelled
  7480. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7 cancelled
  7481. 2025-07-19 23:06:34,637 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26 cancelled
  7482. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8 cancelled
  7483. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18 cancelled
  7484. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9 cancelled
  7485. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19 cancelled
  7486. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10 cancelled
  7487. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20 cancelled
  7488. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11 cancelled
  7489. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21 cancelled
  7490. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1 cancelled
  7491. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12 cancelled
  7492. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22 cancelled
  7493. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2 cancelled
  7494. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-4 cancelled
  7495. 2025-07-19 23:06:34,638 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-89 cancelled
  7496. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-47 cancelled
  7497. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-5 cancelled
  7498. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-90 cancelled
  7499. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-48 cancelled
  7500. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-6 cancelled
  7501. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-91 cancelled
  7502. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-49 cancelled
  7503. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-7 cancelled
  7504. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-92 cancelled
  7505. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-50 cancelled
  7506. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-8 cancelled
  7507. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-93 cancelled
  7508. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-51 cancelled
  7509. 2025-07-19 23:06:34,639 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-9 cancelled
  7510. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-94 cancelled
  7511. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-52 cancelled
  7512. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-10 cancelled
  7513. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-95 cancelled
  7514. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-53 cancelled
  7515. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-11 cancelled
  7516. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-96 cancelled
  7517. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-54 cancelled
  7518. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-12 cancelled
  7519. 2025-07-19 23:06:34,640 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-97 cancelled
  7520. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-55 cancelled
  7521. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-13 cancelled
  7522. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-98 cancelled
  7523. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-56 cancelled
  7524. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-14 cancelled
  7525. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-99 cancelled
  7526. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-57 cancelled
  7527. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-15 cancelled
  7528. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-100 cancelled
  7529. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-58 cancelled
  7530. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-16 cancelled
  7531. 2025-07-19 23:06:34,641 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-101 cancelled
  7532. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-59 cancelled
  7533. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-17 cancelled
  7534. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-102 cancelled
  7535. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-60 cancelled
  7536. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-26 cancelled
  7537. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-103 cancelled
  7538. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-61 cancelled
  7539. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-18 cancelled
  7540. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-104 cancelled
  7541. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-62 cancelled
  7542. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-19 cancelled
  7543. 2025-07-19 23:06:34,642 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-105 cancelled
  7544. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-63 cancelled
  7545. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-20 cancelled
  7546. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-106 cancelled
  7547. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-64 cancelled
  7548. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-21 cancelled
  7549. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-65 cancelled
  7550. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-22 cancelled
  7551. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-66 cancelled
  7552. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-23 cancelled
  7553. 2025-07-19 23:06:34,643 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-67 cancelled
  7554. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-24 cancelled
  7555. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-68 cancelled
  7556. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-25 cancelled
  7557. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-69 cancelled
  7558. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-27 cancelled
  7559. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-70 cancelled
  7560. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-28 cancelled
  7561. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-71 cancelled
  7562. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-29 cancelled
  7563. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-72 cancelled
  7564. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-30 cancelled
  7565. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-73 cancelled
  7566. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-31 cancelled
  7567. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-74 cancelled
  7568. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-32 cancelled
  7569. 2025-07-19 23:06:34,644 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-75 cancelled
  7570. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-33 cancelled
  7571. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-76 cancelled
  7572. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-34 cancelled
  7573. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-77 cancelled
  7574. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-35 cancelled
  7575. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-78 cancelled
  7576. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-36 cancelled
  7577. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-79 cancelled
  7578. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-37 cancelled
  7579. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-80 cancelled
  7580. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-38 cancelled
  7581. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-81 cancelled
  7582. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-39 cancelled
  7583. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-82 cancelled
  7584. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-40 cancelled
  7585. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-83 cancelled
  7586. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-41 cancelled
  7587. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-84 cancelled
  7588. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-42 cancelled
  7589. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-85 cancelled
  7590. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-43 cancelled
  7591. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-1 cancelled
  7592. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-86 cancelled
  7593. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-44 cancelled
  7594. 2025-07-19 23:06:34,645 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-2 cancelled
  7595. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-87 cancelled
  7596. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-45 cancelled
  7597. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-3 cancelled
  7598. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-88 cancelled
  7599. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-46 cancelled
  7600. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2 cancelled
  7601. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10 cancelled
  7602. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5 cancelled
  7603. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8 cancelled
  7604. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3 cancelled
  7605. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6 cancelled
  7606. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1 cancelled
  7607. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9 cancelled
  7608. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4 cancelled
  7609. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7 cancelled
  7610. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-32 cancelled
  7611. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-11 cancelled
  7612. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-43 cancelled
  7613. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-64 cancelled
  7614. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-21 cancelled
  7615. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-53 cancelled
  7616. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-54 cancelled
  7617. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-1 cancelled
  7618. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-33 cancelled
  7619. 2025-07-19 23:06:34,646 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-12 cancelled
  7620. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-44 cancelled
  7621. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-65 cancelled
  7622. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-22 cancelled
  7623. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-55 cancelled
  7624. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-2 cancelled
  7625. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-34 cancelled
  7626. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-13 cancelled
  7627. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-45 cancelled
  7628. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-66 cancelled
  7629. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-23 cancelled
  7630. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-56 cancelled
  7631. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-3 cancelled
  7632. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-35 cancelled
  7633. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-14 cancelled
  7634. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-46 cancelled
  7635. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-67 cancelled
  7636. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-24 cancelled
  7637. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-57 cancelled
  7638. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-4 cancelled
  7639. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-36 cancelled
  7640. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-15 cancelled
  7641. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-47 cancelled
  7642. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-68 cancelled
  7643. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-25 cancelled
  7644. 2025-07-19 23:06:34,647 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-58 cancelled
  7645. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-5 cancelled
  7646. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-37 cancelled
  7647. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-16 cancelled
  7648. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-48 cancelled
  7649. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-27 cancelled
  7650. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-59 cancelled
  7651. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-6 cancelled
  7652. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-38 cancelled
  7653. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-17 cancelled
  7654. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-49 cancelled
  7655. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-28 cancelled
  7656. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-60 cancelled
  7657. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-7 cancelled
  7658. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-39 cancelled
  7659. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-26 cancelled
  7660. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-50 cancelled
  7661. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-29 cancelled
  7662. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-61 cancelled
  7663. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-8 cancelled
  7664. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-40 cancelled
  7665. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-18 cancelled
  7666. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-51 cancelled
  7667. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-30 cancelled
  7668. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-62 cancelled
  7669. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-9 cancelled
  7670. 2025-07-19 23:06:34,648 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-41 cancelled
  7671. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-19 cancelled
  7672. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-42 cancelled
  7673. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-52 cancelled
  7674. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-31 cancelled
  7675. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-63 cancelled
  7676. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-10 cancelled
  7677. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-20 cancelled
  7678. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-7 cancelled
  7679. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-2 cancelled
  7680. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-10 cancelled
  7681. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-5 cancelled
  7682. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-8 cancelled
  7683. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-3 cancelled
  7684. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-6 cancelled
  7685. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-9 cancelled
  7686. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-1 cancelled
  7687. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-4 cancelled
  7688. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1 cancelled
  7689. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2 cancelled
  7690. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/map1.pdf-1 cancelled
  7691. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/small_page_size.pdf-1 cancelled
  7692. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-3 cancelled
  7693. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-6 cancelled
  7694. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-1 cancelled
  7695. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-4 cancelled
  7696. 2025-07-19 23:06:34,649 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-7 cancelled
  7697. 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-2 cancelled
  7698. 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-5 cancelled
  7699. 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/some_ocr1.pdf-1 cancelled
  7700. 2025-07-19 23:06:34,650 - __main__ - INFO - Process page tests/gnarly_pdfs/newspaper.pdf-1 cancelled
  7701. 2025-07-19 23:06:34,650 - __main__ - INFO - Got cancellation request for SGLang server
  7702. 2025-07-19 23:07:14,182 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7703. 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017009.pdf as PDF document
  7704. 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7705. 2025-07-19 23:07:14,182 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017020.pdf as PDF document
  7706. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017028.pdf as PDF document
  7707. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017041.pdf as PDF document
  7708. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017049.pdf as PDF document
  7709. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017008 (1).pdf as PDF document
  7710. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017008.pdf as PDF document
  7711. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2C47380T4440125017023.pdf as PDF document
  7712. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125011001.pdf as PDF document
  7713. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017003.pdf as PDF document
  7714. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017006.pdf as PDF document
  7715. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017007.pdf as PDF document
  7716. 2025-07-19 23:07:14,183 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017011.pdf as PDF document
  7717. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017023.pdf as PDF document
  7718. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017041.pdf as PDF document
  7719. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D06387W3440125017048.pdf as PDF document
  7720. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D42580L4442014010000.pdf as PDF document
  7721. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D6222364440125017008.pdf as PDF document
  7722. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445200MB2D6222364440125017049.pdf as PDF document
  7723. 2025-07-19 23:07:14,184 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111641000.pdf as PDF document
  7724. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111667001.pdf as PDF document
  7725. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202592174409C4442111820005.pdf as PDF document
  7726. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017023.pdf as PDF document
  7727. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017027.pdf as PDF document
  7728. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D1177604440125017041.pdf as PDF document
  7729. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445202MB2D117760444212503R001.pdf as PDF document
  7730. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4440711000000.pdf as PDF document
  7731. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U44421110A0005.pdf as PDF document
  7732. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111640000.pdf as PDF document
  7733. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111641000.pdf as PDF document
  7734. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203007030456U4442111667001.pdf as PDF document
  7735. 2025-07-19 23:07:14,185 - __main__ - INFO - Loading file at scripts/data/11445203707759010G4442014010000.pdf as PDF document
  7736. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445203MB2C21084N4440125017008.pdf as PDF document
  7737. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445203MB2C21084N444212503R001.pdf as PDF document
  7738. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K4440711000000.pdf as PDF document
  7739. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K44421110A0001.pdf as PDF document
  7740. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029500K44421110A0005.pdf as PDF document
  7741. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007029527B4442106100010.pdf as PDF document
  7742. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445222007030157E4440149001001.pdf as PDF document
  7743. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H4440711000000.pdf as PDF document
  7744. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H44421110A0001.pdf as PDF document
  7745. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035644H44421110A0005.pdf as PDF document
  7746. 2025-07-19 23:07:14,186 - __main__ - INFO - Loading file at scripts/data/11445224007035652C4440114020001.pdf as PDF document
  7747. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445224007035652C4442014010000.pdf as PDF document
  7748. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4440711000000.pdf as PDF document
  7749. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A44421110A0001.pdf as PDF document
  7750. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A44421110A0005.pdf as PDF document
  7751. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111641000.pdf as PDF document
  7752. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111667001.pdf as PDF document
  7753. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/11445281588281455A4442111820005.pdf as PDF document
  7754. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200456019383L3442111667001.pdf as PDF document
  7755. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200726503846U344201405500301.pdf as PDF document
  7756. 2025-07-19 23:07:14,187 - __main__ - INFO - Loading file at scripts/data/12445200726503846U3442014055009.pdf as PDF document
  7757. 2025-07-19 23:07:14,187 - __main__ - INFO - Found 54 total pdf paths to add
  7758. 2025-07-19 23:07:14,306 - __main__ - INFO - Calculated items_per_group: 53 based on average pages per PDF: 9.35
  7759. 2025-07-19 23:07:14,512 - __main__ - INFO - Starting pipeline with PID 555339
  7760. 2025-07-19 23:07:14,512 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7761. 2025-07-19 23:07:25,124 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  7762. 2025-07-19 23:07:27,569 - sglang - INFO - [2025-07-19 23:07:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=495738545, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7763. 2025-07-19 23:07:27,569 - __main__ - INFO - [2025-07-19 23:07:27] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=495738545, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7764. 2025-07-19 23:07:31,183 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  7765. 2025-07-19 23:07:33,574 - sglang - INFO - [2025-07-19 23:07:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7766. 2025-07-19 23:07:33,574 - __main__ - INFO - [2025-07-19 23:07:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7767. 2025-07-19 23:07:37,266 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  7768. 2025-07-19 23:07:43,350 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  7769. 2025-07-19 23:07:43,353 - __main__ - INFO - Got cancellation request for SGLang server
  7770. 2025-07-19 23:08:05,369 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7771. 2025-07-19 23:08:05,369 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7772. 2025-07-19 23:08:05,369 - __main__ - INFO - Found 1 total pdf paths to add
  7773. 2025-07-19 23:08:05,374 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  7774. 2025-07-19 23:08:05,594 - __main__ - INFO - Starting pipeline with PID 556062
  7775. 2025-07-19 23:08:05,594 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7776. 2025-07-19 23:08:11,182 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  7777. 2025-07-19 23:08:13,754 - sglang - INFO - [2025-07-19 23:08:13] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=266199639, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7778. 2025-07-19 23:08:13,754 - __main__ - INFO - [2025-07-19 23:08:13] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=266199639, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7779. 2025-07-19 23:08:17,243 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  7780. 2025-07-19 23:08:23,178 - sglang - INFO - [2025-07-19 23:08:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7781. 2025-07-19 23:08:23,178 - __main__ - INFO - [2025-07-19 23:08:23] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7782. 2025-07-19 23:08:23,301 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  7783. 2025-07-19 23:08:29,383 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  7784. 2025-07-19 23:08:30,095 - sglang - INFO - [2025-07-19 23:08:30 TP0] Overlap scheduler is disabled for multimodal models.
  7785. 2025-07-19 23:08:30,095 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Overlap scheduler is disabled for multimodal models.
  7786. 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7787. 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7788. 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7789. 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7790. 2025-07-19 23:08:30,759 - sglang - INFO - [2025-07-19 23:08:30 TP0] Init torch distributed begin.
  7791. 2025-07-19 23:08:30,759 - __main__ - INFO - [2025-07-19 23:08:30 TP0] Init torch distributed begin.
  7792. 2025-07-19 23:08:35,464 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  7793. 2025-07-19 23:08:36,190 - sglang - INFO - [2025-07-19 23:08:36 TP0] Load weight begin. avail mem=23.33 GB
  7794. 2025-07-19 23:08:36,190 - __main__ - INFO - [2025-07-19 23:08:36 TP0] Load weight begin. avail mem=23.33 GB
  7795. 2025-07-19 23:08:37,388 - sglang - INFO - [2025-07-19 23:08:37 TP0] Using model weights format ['*.safetensors']
  7796. 2025-07-19 23:08:37,388 - __main__ - INFO - [2025-07-19 23:08:37 TP0] Using model weights format ['*.safetensors']
  7797. 2025-07-19 23:08:37,969 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7798. 2025-07-19 23:08:37,969 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7799. 2025-07-19 23:08:38,880 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.10it/s]
  7800. 2025-07-19 23:08:38,880 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.10it/s]
  7801. 2025-07-19 23:08:39,724 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
  7802. 2025-07-19 23:08:39,725 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
  7803. 2025-07-19 23:08:40,691 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
  7804. 2025-07-19 23:08:40,691 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
  7805. 2025-07-19 23:08:41,545 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  7806. 2025-07-19 23:08:41,748 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.03it/s]
  7807. 2025-07-19 23:08:41,748 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.03it/s]
  7808. 2025-07-19 23:08:41,748 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
  7809. 2025-07-19 23:08:41,748 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.06it/s]
  7810. 2025-07-19 23:08:41,748 - sglang - INFO -
  7811. 2025-07-19 23:08:41,748 - __main__ - INFO -
  7812. 2025-07-19 23:08:42,117 - sglang - INFO - [2025-07-19 23:08:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7813. 2025-07-19 23:08:42,117 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7814. 2025-07-19 23:08:42,126 - sglang - INFO - [2025-07-19 23:08:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7815. 2025-07-19 23:08:42,126 - __main__ - INFO - [2025-07-19 23:08:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7816. 2025-07-19 23:08:42,126 - sglang - INFO - [2025-07-19 23:08:42 TP0] Memory pool end. avail mem=5.30 GB
  7817. 2025-07-19 23:08:42,126 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Memory pool end. avail mem=5.30 GB
  7818. 2025-07-19 23:08:42,354 - sglang - INFO - [2025-07-19 23:08:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  7819. 2025-07-19 23:08:42,354 - __main__ - INFO - [2025-07-19 23:08:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  7820. 2025-07-19 23:08:44,496 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.41s/it] 50%|█████ | 2/4 [00:01<00:01, 1.38it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.99it/s] 100%|██████████| 4/4 [00:02<00:00, 2.50it/s] 100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
  7821. 2025-07-19 23:08:44,496 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.41s/it] 50%|█████ | 2/4 [00:01<00:01, 1.38it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.99it/s] 100%|██████████| 4/4 [00:02<00:00, 2.50it/s] 100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
  7822. 2025-07-19 23:08:44,496 - sglang - INFO - [2025-07-19 23:08:44 TP0] Capture cuda graph end. Time elapsed: 2.14 s
  7823. 2025-07-19 23:08:44,496 - __main__ - INFO - [2025-07-19 23:08:44 TP0] Capture cuda graph end. Time elapsed: 2.14 s
  7824. 2025-07-19 23:08:47,411 - sglang - INFO - [2025-07-19 23:08:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7825. 2025-07-19 23:08:47,411 - __main__ - INFO - [2025-07-19 23:08:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7826. 2025-07-19 23:08:47,494 - sglang - INFO - [2025-07-19 23:08:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7827. 2025-07-19 23:08:47,494 - __main__ - INFO - [2025-07-19 23:08:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7828. 2025-07-19 23:08:47,625 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  7829. 2025-07-19 23:08:53,705 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  7830. 2025-07-19 23:08:59,786 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  7831. 2025-07-19 23:09:05,866 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  7832. 2025-07-19 23:09:11,947 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  7833. 2025-07-19 23:09:18,027 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  7834. 2025-07-19 23:09:24,108 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  7835. 2025-07-19 23:09:30,188 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  7836. 2025-07-19 23:09:36,268 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  7837. 2025-07-19 23:09:42,348 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  7838. 2025-07-19 23:09:42,545 - __main__ - INFO - Got cancellation request for SGLang server
  7839. 2025-07-19 23:10:41,614 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7840. 2025-07-19 23:10:41,614 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7841. 2025-07-19 23:10:41,614 - __main__ - INFO - Found 1 total pdf paths to add
  7842. 2025-07-19 23:10:41,619 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  7843. 2025-07-19 23:10:41,861 - __main__ - INFO - Starting pipeline with PID 557098
  7844. 2025-07-19 23:10:41,861 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7845. 2025-07-19 23:11:22,329 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7846. 2025-07-19 23:11:22,329 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7847. 2025-07-19 23:11:22,329 - __main__ - INFO - Found 1 total pdf paths to add
  7848. 2025-07-19 23:11:22,334 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  7849. 2025-07-19 23:11:22,524 - __main__ - INFO - Starting pipeline with PID 557180
  7850. 2025-07-19 23:11:22,524 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7851. 2025-07-19 23:11:28,359 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  7852. 2025-07-19 23:11:30,721 - sglang - INFO - [2025-07-19 23:11:30] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247677095, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7853. 2025-07-19 23:11:30,722 - __main__ - INFO - [2025-07-19 23:11:30] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=247677095, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7854. 2025-07-19 23:11:34,466 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  7855. 2025-07-19 23:11:40,546 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  7856. 2025-07-19 23:11:41,791 - sglang - INFO - [2025-07-19 23:11:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7857. 2025-07-19 23:11:41,792 - __main__ - INFO - [2025-07-19 23:11:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7858. 2025-07-19 23:11:46,615 - sglang - INFO - [2025-07-19 23:11:46 TP0] Overlap scheduler is disabled for multimodal models.
  7859. 2025-07-19 23:11:46,615 - __main__ - INFO - [2025-07-19 23:11:46 TP0] Overlap scheduler is disabled for multimodal models.
  7860. 2025-07-19 23:11:46,627 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  7861. 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7862. 2025-07-19 23:11:47,281 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7863. 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7864. 2025-07-19 23:11:47,281 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7865. 2025-07-19 23:11:47,281 - sglang - INFO - [2025-07-19 23:11:47 TP0] Init torch distributed begin.
  7866. 2025-07-19 23:11:47,282 - __main__ - INFO - [2025-07-19 23:11:47 TP0] Init torch distributed begin.
  7867. 2025-07-19 23:11:52,667 - sglang - INFO - [2025-07-19 23:11:52 TP0] Load weight begin. avail mem=23.33 GB
  7868. 2025-07-19 23:11:52,667 - __main__ - INFO - [2025-07-19 23:11:52 TP0] Load weight begin. avail mem=23.33 GB
  7869. 2025-07-19 23:11:52,707 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  7870. 2025-07-19 23:11:54,501 - sglang - INFO - [2025-07-19 23:11:54 TP0] Using model weights format ['*.safetensors']
  7871. 2025-07-19 23:11:54,501 - __main__ - INFO - [2025-07-19 23:11:54 TP0] Using model weights format ['*.safetensors']
  7872. 2025-07-19 23:11:55,094 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7873. 2025-07-19 23:11:55,094 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7874. 2025-07-19 23:11:55,375 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
  7875. 2025-07-19 23:11:55,375 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.56it/s]
  7876. 2025-07-19 23:11:56,196 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  7877. 2025-07-19 23:11:56,197 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  7878. 2025-07-19 23:11:56,989 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.45it/s]
  7879. 2025-07-19 23:11:56,989 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.45it/s]
  7880. 2025-07-19 23:11:57,877 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.30it/s]
  7881. 2025-07-19 23:11:57,877 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.30it/s]
  7882. 2025-07-19 23:11:57,877 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
  7883. 2025-07-19 23:11:57,877 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
  7884. 2025-07-19 23:11:57,877 - sglang - INFO -
  7885. 2025-07-19 23:11:57,878 - __main__ - INFO -
  7886. 2025-07-19 23:11:58,085 - sglang - INFO - [2025-07-19 23:11:58 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7887. 2025-07-19 23:11:58,086 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7888. 2025-07-19 23:11:58,095 - sglang - INFO - [2025-07-19 23:11:58 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7889. 2025-07-19 23:11:58,095 - __main__ - INFO - [2025-07-19 23:11:58 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7890. 2025-07-19 23:11:58,095 - sglang - INFO - [2025-07-19 23:11:58 TP0] Memory pool end. avail mem=5.30 GB
  7891. 2025-07-19 23:11:58,095 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Memory pool end. avail mem=5.30 GB
  7892. 2025-07-19 23:11:58,326 - sglang - INFO - [2025-07-19 23:11:58 TP0] Capture cuda graph begin. This can take up to several minutes.
  7893. 2025-07-19 23:11:58,327 - __main__ - INFO - [2025-07-19 23:11:58 TP0] Capture cuda graph begin. This can take up to several minutes.
  7894. 2025-07-19 23:11:58,788 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  7895. 2025-07-19 23:12:00,152 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.09s/it] 50%|█████ | 2/4 [00:01<00:01, 1.69it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
  7896. 2025-07-19 23:12:00,152 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.09s/it] 50%|█████ | 2/4 [00:01<00:01, 1.69it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s] 100%|██████████| 4/4 [00:01<00:00, 2.79it/s] 100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
  7897. 2025-07-19 23:12:00,153 - sglang - INFO - [2025-07-19 23:12:00 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  7898. 2025-07-19 23:12:00,153 - __main__ - INFO - [2025-07-19 23:12:00 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  7899. 2025-07-19 23:12:03,340 - sglang - INFO - [2025-07-19 23:12:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7900. 2025-07-19 23:12:03,341 - __main__ - INFO - [2025-07-19 23:12:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7901. 2025-07-19 23:12:03,421 - sglang - INFO - [2025-07-19 23:12:03] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7902. 2025-07-19 23:12:03,422 - __main__ - INFO - [2025-07-19 23:12:03] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7903. 2025-07-19 23:12:04,868 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  7904. 2025-07-19 23:12:10,948 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  7905. 2025-07-19 23:12:17,028 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  7906. 2025-07-19 23:12:23,107 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  7907. 2025-07-19 23:12:29,186 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  7908. 2025-07-19 23:12:35,266 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  7909. 2025-07-19 23:12:41,345 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  7910. 2025-07-19 23:12:47,425 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  7911. 2025-07-19 23:12:53,503 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  7912. 2025-07-19 23:12:55,861 - sglang - INFO - Process Process-2:
  7913. 2025-07-19 23:12:55,861 - __main__ - INFO - Process Process-2:
  7914. 2025-07-19 23:12:55,861 - sglang - INFO - Process Process-1:
  7915. 2025-07-19 23:12:55,861 - __main__ - INFO - Process Process-1:
  7916. 2025-07-19 23:12:55,862 - __main__ - INFO - Got cancellation request for SGLang server
  7917. 2025-07-19 23:44:52,028 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7918. 2025-07-19 23:44:52,029 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7919. 2025-07-19 23:44:52,029 - __main__ - INFO - Found 1 total pdf paths to add
  7920. 2025-07-19 23:44:52,034 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  7921. 2025-07-19 23:44:52,207 - __main__ - INFO - Starting pipeline with PID 560498
  7922. 2025-07-19 23:44:52,208 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7923. 2025-07-19 23:45:02,904 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  7924. 2025-07-19 23:45:05,219 - sglang - INFO - [2025-07-19 23:45:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=279913238, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7925. 2025-07-19 23:45:05,219 - __main__ - INFO - [2025-07-19 23:45:05] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=279913238, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  7926. 2025-07-19 23:45:08,985 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  7927. 2025-07-19 23:45:14,633 - sglang - INFO - [2025-07-19 23:45:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7928. 2025-07-19 23:45:14,634 - __main__ - INFO - [2025-07-19 23:45:14] Use chat template for the OpenAI-compatible API server: qwen2-vl
  7929. 2025-07-19 23:45:15,066 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  7930. 2025-07-19 23:45:21,148 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  7931. 2025-07-19 23:45:21,289 - sglang - INFO - [2025-07-19 23:45:21 TP0] Overlap scheduler is disabled for multimodal models.
  7932. 2025-07-19 23:45:21,289 - __main__ - INFO - [2025-07-19 23:45:21 TP0] Overlap scheduler is disabled for multimodal models.
  7933. 2025-07-19 23:45:22,002 - sglang - INFO - [2025-07-19 23:45:22 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7934. 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  7935. 2025-07-19 23:45:22,003 - sglang - INFO - [2025-07-19 23:45:22 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7936. 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  7937. 2025-07-19 23:45:22,003 - sglang - INFO - [2025-07-19 23:45:22 TP0] Init torch distributed begin.
  7938. 2025-07-19 23:45:22,003 - __main__ - INFO - [2025-07-19 23:45:22 TP0] Init torch distributed begin.
  7939. 2025-07-19 23:45:27,230 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  7940. 2025-07-19 23:45:27,404 - sglang - INFO - [2025-07-19 23:45:27 TP0] Load weight begin. avail mem=23.33 GB
  7941. 2025-07-19 23:45:27,404 - __main__ - INFO - [2025-07-19 23:45:27 TP0] Load weight begin. avail mem=23.33 GB
  7942. 2025-07-19 23:45:28,877 - sglang - INFO - [2025-07-19 23:45:28 TP0] Using model weights format ['*.safetensors']
  7943. 2025-07-19 23:45:28,877 - __main__ - INFO - [2025-07-19 23:45:28 TP0] Using model weights format ['*.safetensors']
  7944. 2025-07-19 23:45:29,998 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7945. 2025-07-19 23:45:29,998 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  7946. 2025-07-19 23:45:30,295 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.37it/s]
  7947. 2025-07-19 23:45:30,295 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.37it/s]
  7948. 2025-07-19 23:45:31,105 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  7949. 2025-07-19 23:45:31,105 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  7950. 2025-07-19 23:45:31,891 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.46it/s]
  7951. 2025-07-19 23:45:31,891 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.46it/s]
  7952. 2025-07-19 23:45:32,771 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.31it/s]
  7953. 2025-07-19 23:45:32,771 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.31it/s]
  7954. 2025-07-19 23:45:32,771 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
  7955. 2025-07-19 23:45:32,771 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.44it/s]
  7956. 2025-07-19 23:45:32,771 - sglang - INFO -
  7957. 2025-07-19 23:45:32,771 - __main__ - INFO -
  7958. 2025-07-19 23:45:32,930 - sglang - INFO - [2025-07-19 23:45:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7959. 2025-07-19 23:45:32,931 - __main__ - INFO - [2025-07-19 23:45:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  7960. 2025-07-19 23:45:32,938 - sglang - INFO - [2025-07-19 23:45:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7961. 2025-07-19 23:45:32,938 - __main__ - INFO - [2025-07-19 23:45:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  7962. 2025-07-19 23:45:32,938 - sglang - INFO - [2025-07-19 23:45:32 TP0] Memory pool end. avail mem=5.30 GB
  7963. 2025-07-19 23:45:32,938 - __main__ - INFO - [2025-07-19 23:45:32 TP0] Memory pool end. avail mem=5.30 GB
  7964. 2025-07-19 23:45:33,126 - sglang - INFO - [2025-07-19 23:45:33 TP0] Capture cuda graph begin. This can take up to several minutes.
  7965. 2025-07-19 23:45:33,126 - __main__ - INFO - [2025-07-19 23:45:33 TP0] Capture cuda graph begin. This can take up to several minutes.
  7966. 2025-07-19 23:45:33,312 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  7967. 2025-07-19 23:45:35,023 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.16s/it] 50%|█████ | 2/4 [00:01<00:01, 1.61it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.22it/s] 100%|██████████| 4/4 [00:01<00:00, 2.72it/s] 100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
  7968. 2025-07-19 23:45:35,023 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.16s/it] 50%|█████ | 2/4 [00:01<00:01, 1.61it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.22it/s] 100%|██████████| 4/4 [00:01<00:00, 2.72it/s] 100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
  7969. 2025-07-19 23:45:35,024 - sglang - INFO - [2025-07-19 23:45:35 TP0] Capture cuda graph end. Time elapsed: 1.90 s
  7970. 2025-07-19 23:45:35,024 - __main__ - INFO - [2025-07-19 23:45:35 TP0] Capture cuda graph end. Time elapsed: 1.90 s
  7971. 2025-07-19 23:45:38,605 - sglang - INFO - [2025-07-19 23:45:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7972. 2025-07-19 23:45:38,605 - __main__ - INFO - [2025-07-19 23:45:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  7973. 2025-07-19 23:45:38,692 - sglang - INFO - [2025-07-19 23:45:38] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7974. 2025-07-19 23:45:38,693 - __main__ - INFO - [2025-07-19 23:45:38] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  7975. 2025-07-19 23:45:39,393 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  7976. 2025-07-19 23:45:45,471 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  7977. 2025-07-19 23:45:51,550 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  7978. 2025-07-19 23:45:57,627 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  7979. 2025-07-19 23:46:03,706 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  7980. 2025-07-19 23:46:09,785 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  7981. 2025-07-19 23:46:15,863 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  7982. 2025-07-19 23:46:21,942 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  7983. 2025-07-19 23:46:28,021 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  7984. 2025-07-19 23:46:34,102 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  7985. 2025-07-19 23:46:40,182 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  7986. 2025-07-19 23:46:46,261 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  7987. 2025-07-19 23:46:52,341 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  7988. 2025-07-19 23:46:58,289 - sglang - INFO - Process Process-1:
  7989. 2025-07-19 23:46:58,289 - __main__ - INFO - Process Process-1:
  7990. 2025-07-19 23:46:58,289 - sglang - INFO - Process Process-2:
  7991. 2025-07-19 23:46:58,289 - __main__ - INFO - Process Process-2:
  7992. 2025-07-19 23:46:58,290 - __main__ - INFO - Got cancellation request for SGLang server
  7993. 2025-07-19 23:47:07,596 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  7994. 2025-07-19 23:47:07,597 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  7995. 2025-07-19 23:47:07,597 - __main__ - INFO - Found 1 total pdf paths to add
  7996. 2025-07-19 23:47:07,602 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  7997. 2025-07-19 23:47:07,823 - __main__ - INFO - Starting pipeline with PID 561326
  7998. 2025-07-19 23:47:07,824 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  7999. 2025-07-19 23:47:13,807 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8000. 2025-07-19 23:47:16,268 - sglang - INFO - [2025-07-19 23:47:16] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=716861363, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8001. 2025-07-19 23:47:16,268 - __main__ - INFO - [2025-07-19 23:47:16] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=716861363, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8002. 2025-07-19 23:47:19,909 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8003. 2025-07-19 23:47:21,758 - sglang - INFO - [2025-07-19 23:47:21] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8004. 2025-07-19 23:47:21,758 - __main__ - INFO - [2025-07-19 23:47:21] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8005. 2025-07-19 23:47:25,990 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8006. 2025-07-19 23:47:32,052 - sglang - INFO - [2025-07-19 23:47:32 TP0] Overlap scheduler is disabled for multimodal models.
  8007. 2025-07-19 23:47:32,052 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Overlap scheduler is disabled for multimodal models.
  8008. 2025-07-19 23:47:32,070 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8009. 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8010. 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8011. 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8012. 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8013. 2025-07-19 23:47:32,712 - sglang - INFO - [2025-07-19 23:47:32 TP0] Init torch distributed begin.
  8014. 2025-07-19 23:47:32,712 - __main__ - INFO - [2025-07-19 23:47:32 TP0] Init torch distributed begin.
  8015. 2025-07-19 23:47:38,110 - sglang - INFO - [2025-07-19 23:47:38 TP0] Load weight begin. avail mem=23.33 GB
  8016. 2025-07-19 23:47:38,110 - __main__ - INFO - [2025-07-19 23:47:38 TP0] Load weight begin. avail mem=23.33 GB
  8017. 2025-07-19 23:47:38,151 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8018. 2025-07-19 23:47:39,227 - sglang - INFO - [2025-07-19 23:47:39 TP0] Using model weights format ['*.safetensors']
  8019. 2025-07-19 23:47:39,227 - __main__ - INFO - [2025-07-19 23:47:39 TP0] Using model weights format ['*.safetensors']
  8020. 2025-07-19 23:47:39,795 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8021. 2025-07-19 23:47:39,795 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8022. 2025-07-19 23:47:40,080 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
  8023. 2025-07-19 23:47:40,080 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
  8024. 2025-07-19 23:47:40,878 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.70it/s]
  8025. 2025-07-19 23:47:40,878 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.70it/s]
  8026. 2025-07-19 23:47:41,647 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.49it/s]
  8027. 2025-07-19 23:47:41,648 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.49it/s]
  8028. 2025-07-19 23:47:42,527 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
  8029. 2025-07-19 23:47:42,527 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
  8030. 2025-07-19 23:47:42,527 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
  8031. 2025-07-19 23:47:42,527 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
  8032. 2025-07-19 23:47:42,527 - sglang - INFO -
  8033. 2025-07-19 23:47:42,527 - __main__ - INFO -
  8034. 2025-07-19 23:47:42,684 - sglang - INFO - [2025-07-19 23:47:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8035. 2025-07-19 23:47:42,684 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8036. 2025-07-19 23:47:42,693 - sglang - INFO - [2025-07-19 23:47:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8037. 2025-07-19 23:47:42,693 - __main__ - INFO - [2025-07-19 23:47:42 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8038. 2025-07-19 23:47:42,693 - sglang - INFO - [2025-07-19 23:47:42 TP0] Memory pool end. avail mem=5.30 GB
  8039. 2025-07-19 23:47:42,693 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Memory pool end. avail mem=5.30 GB
  8040. 2025-07-19 23:47:42,894 - sglang - INFO - [2025-07-19 23:47:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  8041. 2025-07-19 23:47:42,895 - __main__ - INFO - [2025-07-19 23:47:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  8042. 2025-07-19 23:47:44,231 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8043. 2025-07-19 23:47:44,720 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.11s/it] 50%|█████ | 2/4 [00:01<00:01, 1.67it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s] 100%|██████████| 4/4 [00:01<00:00, 2.82it/s] 100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
  8044. 2025-07-19 23:47:44,720 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.11s/it] 50%|█████ | 2/4 [00:01<00:01, 1.67it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.31it/s] 100%|██████████| 4/4 [00:01<00:00, 2.82it/s] 100%|██████████| 4/4 [00:01<00:00, 2.20it/s]
  8045. 2025-07-19 23:47:44,721 - sglang - INFO - [2025-07-19 23:47:44 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  8046. 2025-07-19 23:47:44,721 - __main__ - INFO - [2025-07-19 23:47:44 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  8047. 2025-07-19 23:47:47,910 - sglang - INFO - [2025-07-19 23:47:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8048. 2025-07-19 23:47:47,910 - __main__ - INFO - [2025-07-19 23:47:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8049. 2025-07-19 23:47:47,999 - sglang - INFO - [2025-07-19 23:47:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8050. 2025-07-19 23:47:47,999 - __main__ - INFO - [2025-07-19 23:47:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8051. 2025-07-19 23:47:50,313 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8052. 2025-07-19 23:47:56,392 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8053. 2025-07-19 23:48:02,471 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8054. 2025-07-19 23:48:08,550 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  8055. 2025-07-19 23:48:14,629 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  8056. 2025-07-19 23:48:20,709 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  8057. 2025-07-19 23:48:26,788 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  8058. 2025-07-19 23:48:30,539 - sglang - INFO - Process Process-2:
  8059. 2025-07-19 23:48:30,539 - __main__ - INFO - Process Process-2:
  8060. 2025-07-19 23:48:30,540 - sglang - INFO - Process Process-1:
  8061. 2025-07-19 23:48:30,540 - __main__ - INFO - Process Process-1:
  8062. 2025-07-19 23:48:30,540 - __main__ - INFO - Got cancellation request for SGLang server
  8063. 2025-07-19 23:49:00,325 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8064. 2025-07-19 23:49:00,325 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8065. 2025-07-19 23:49:00,325 - __main__ - INFO - Found 1 total pdf paths to add
  8066. 2025-07-19 23:49:00,330 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8067. 2025-07-19 23:49:00,552 - __main__ - INFO - Starting pipeline with PID 562231
  8068. 2025-07-19 23:49:00,552 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8069. 2025-07-19 23:49:11,172 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8070. 2025-07-19 23:49:12,801 - sglang - INFO - [2025-07-19 23:49:12] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=109481094, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8071. 2025-07-19 23:49:12,801 - __main__ - INFO - [2025-07-19 23:49:12] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=109481094, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8072. 2025-07-19 23:49:17,223 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8073. 2025-07-19 23:49:22,212 - sglang - INFO - [2025-07-19 23:49:22] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8074. 2025-07-19 23:49:22,212 - __main__ - INFO - [2025-07-19 23:49:22] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8075. 2025-07-19 23:49:23,129 - sglang - INFO - [2025-07-19 23:49:23 TP0] Overlap scheduler is disabled for multimodal models.
  8076. 2025-07-19 23:49:23,129 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Overlap scheduler is disabled for multimodal models.
  8077. 2025-07-19 23:49:23,300 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8078. 2025-07-19 23:49:23,885 - sglang - INFO - [2025-07-19 23:49:23 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8079. 2025-07-19 23:49:23,885 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8080. 2025-07-19 23:49:23,886 - sglang - INFO - [2025-07-19 23:49:23 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8081. 2025-07-19 23:49:23,886 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8082. 2025-07-19 23:49:23,886 - sglang - INFO - [2025-07-19 23:49:23 TP0] Init torch distributed begin.
  8083. 2025-07-19 23:49:23,886 - __main__ - INFO - [2025-07-19 23:49:23 TP0] Init torch distributed begin.
  8084. 2025-07-19 23:49:29,268 - sglang - INFO - [2025-07-19 23:49:29 TP0] Load weight begin. avail mem=23.33 GB
  8085. 2025-07-19 23:49:29,268 - __main__ - INFO - [2025-07-19 23:49:29 TP0] Load weight begin. avail mem=23.33 GB
  8086. 2025-07-19 23:49:29,378 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8087. 2025-07-19 23:49:31,052 - sglang - INFO - [2025-07-19 23:49:31 TP0] Using model weights format ['*.safetensors']
  8088. 2025-07-19 23:49:31,052 - __main__ - INFO - [2025-07-19 23:49:31 TP0] Using model weights format ['*.safetensors']
  8089. 2025-07-19 23:49:31,692 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8090. 2025-07-19 23:49:31,693 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8091. 2025-07-19 23:49:31,977 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
  8092. 2025-07-19 23:49:31,978 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.51it/s]
  8093. 2025-07-19 23:49:32,770 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.71it/s]
  8094. 2025-07-19 23:49:32,771 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.71it/s]
  8095. 2025-07-19 23:49:33,535 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.50it/s]
  8096. 2025-07-19 23:49:33,535 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.50it/s]
  8097. 2025-07-19 23:49:34,409 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
  8098. 2025-07-19 23:49:34,409 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.34it/s]
  8099. 2025-07-19 23:49:34,409 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.47it/s]
  8100. 2025-07-19 23:49:34,409 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.47it/s]
  8101. 2025-07-19 23:49:34,410 - sglang - INFO -
  8102. 2025-07-19 23:49:34,410 - __main__ - INFO -
  8103. 2025-07-19 23:49:34,567 - sglang - INFO - [2025-07-19 23:49:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8104. 2025-07-19 23:49:34,567 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8105. 2025-07-19 23:49:34,574 - sglang - INFO - [2025-07-19 23:49:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8106. 2025-07-19 23:49:34,574 - __main__ - INFO - [2025-07-19 23:49:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8107. 2025-07-19 23:49:34,575 - sglang - INFO - [2025-07-19 23:49:34 TP0] Memory pool end. avail mem=5.30 GB
  8108. 2025-07-19 23:49:34,575 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Memory pool end. avail mem=5.30 GB
  8109. 2025-07-19 23:49:34,768 - sglang - INFO - [2025-07-19 23:49:34 TP0] Capture cuda graph begin. This can take up to several minutes.
  8110. 2025-07-19 23:49:34,768 - __main__ - INFO - [2025-07-19 23:49:34 TP0] Capture cuda graph begin. This can take up to several minutes.
  8111. 2025-07-19 23:49:35,456 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8112. 2025-07-19 23:49:36,569 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.08s/it] 50%|█████ | 2/4 [00:01<00:01, 1.70it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s] 100%|██████████| 4/4 [00:01<00:00, 2.84it/s] 100%|██████████| 4/4 [00:01<00:00, 2.23it/s]
  8113. 2025-07-19 23:49:36,569 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.08s/it] 50%|█████ | 2/4 [00:01<00:01, 1.70it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s] 100%|██████████| 4/4 [00:01<00:00, 2.84it/s] 100%|██████████| 4/4 [00:01<00:00, 2.23it/s]
  8114. 2025-07-19 23:49:36,570 - sglang - INFO - [2025-07-19 23:49:36 TP0] Capture cuda graph end. Time elapsed: 1.80 s
  8115. 2025-07-19 23:49:36,570 - __main__ - INFO - [2025-07-19 23:49:36 TP0] Capture cuda graph end. Time elapsed: 1.80 s
  8116. 2025-07-19 23:49:41,048 - sglang - INFO - [2025-07-19 23:49:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8117. 2025-07-19 23:49:41,049 - __main__ - INFO - [2025-07-19 23:49:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8118. 2025-07-19 23:49:41,131 - sglang - INFO - [2025-07-19 23:49:41] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8119. 2025-07-19 23:49:41,131 - __main__ - INFO - [2025-07-19 23:49:41] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8120. 2025-07-19 23:49:41,536 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8121. 2025-07-19 23:49:47,615 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8122. 2025-07-19 23:49:53,695 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8123. 2025-07-19 23:49:59,774 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8124. 2025-07-19 23:50:05,853 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  8125. 2025-07-19 23:50:11,933 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  8126. 2025-07-19 23:50:18,013 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  8127. 2025-07-19 23:50:24,091 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  8128. 2025-07-19 23:50:30,170 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  8129. 2025-07-19 23:50:36,249 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  8130. 2025-07-19 23:50:42,329 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  8131. 2025-07-19 23:50:48,408 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  8132. 2025-07-19 23:50:54,488 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  8133. 2025-07-19 23:51:00,567 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  8134. 2025-07-19 23:51:06,647 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  8135. 2025-07-19 23:51:12,727 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  8136. 2025-07-19 23:51:18,807 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  8137. 2025-07-19 23:51:24,887 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  8138. 2025-07-19 23:51:30,967 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  8139. 2025-07-19 23:51:37,046 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  8140. 2025-07-19 23:51:43,126 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  8141. 2025-07-19 23:51:49,206 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  8142. 2025-07-19 23:51:55,286 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  8143. 2025-07-19 23:52:01,366 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  8144. 2025-07-19 23:52:07,446 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  8145. 2025-07-19 23:52:13,524 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  8146. 2025-07-19 23:52:19,604 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  8147. 2025-07-19 23:52:25,683 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  8148. 2025-07-19 23:52:31,764 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  8149. 2025-07-19 23:52:37,842 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  8150. 2025-07-19 23:52:43,923 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  8151. 2025-07-19 23:52:50,002 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  8152. 2025-07-19 23:52:56,083 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  8153. 2025-07-19 23:53:02,162 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  8154. 2025-07-19 23:53:08,241 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  8155. 2025-07-19 23:53:14,321 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  8156. 2025-07-19 23:53:20,400 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  8157. 2025-07-19 23:53:26,478 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  8158. 2025-07-19 23:53:32,558 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  8159. 2025-07-19 23:53:38,637 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  8160. 2025-07-19 23:53:44,718 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  8161. 2025-07-19 23:53:50,798 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  8162. 2025-07-19 23:53:56,878 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  8163. 2025-07-19 23:54:02,958 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  8164. 2025-07-19 23:54:09,037 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  8165. 2025-07-19 23:54:15,115 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  8166. 2025-07-19 23:54:21,195 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  8167. 2025-07-19 23:54:27,275 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  8168. 2025-07-19 23:54:33,355 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  8169. 2025-07-19 23:54:39,435 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  8170. 2025-07-19 23:54:45,515 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  8171. 2025-07-19 23:54:50,335 - sglang - INFO - Process Process-2:
  8172. 2025-07-19 23:54:50,335 - __main__ - INFO - Process Process-2:
  8173. 2025-07-19 23:54:50,336 - sglang - INFO - Process Process-1:
  8174. 2025-07-19 23:54:50,336 - __main__ - INFO - Process Process-1:
  8175. 2025-07-19 23:54:50,336 - sglang - INFO - Traceback (most recent call last):
  8176. 2025-07-19 23:54:50,336 - __main__ - INFO - Traceback (most recent call last):
  8177. 2025-07-19 23:54:50,336 - sglang - INFO - Traceback (most recent call last):
  8178. 2025-07-19 23:54:50,336 - __main__ - INFO - Traceback (most recent call last):
  8179. 2025-07-19 23:54:50,336 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
  8180. 2025-07-19 23:54:50,336 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
  8181. 2025-07-19 23:54:50,336 - sglang - INFO - self.run()
  8182. 2025-07-19 23:54:50,337 - __main__ - INFO - self.run()
  8183. 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
  8184. 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
  8185. 2025-07-19 23:54:50,337 - sglang - INFO - self._target(*self._args, **self._kwargs)
  8186. 2025-07-19 23:54:50,337 - __main__ - INFO - self._target(*self._args, **self._kwargs)
  8187. 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
  8188. 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
  8189. 2025-07-19 23:54:50,337 - sglang - INFO - scheduler.event_loop_normal()
  8190. 2025-07-19 23:54:50,337 - __main__ - INFO - scheduler.event_loop_normal()
  8191. 2025-07-19 23:54:50,337 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
  8192. 2025-07-19 23:54:50,337 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
  8193. 2025-07-19 23:54:50,337 - sglang - INFO - return func(*args, **kwargs)
  8194. 2025-07-19 23:54:50,337 - __main__ - INFO - return func(*args, **kwargs)
  8195. 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^
  8196. 2025-07-19 23:54:50,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^
  8197. 2025-07-19 23:54:50,338 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 473, in event_loop_normal
  8198. 2025-07-19 23:54:50,338 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 473, in event_loop_normal
  8199. 2025-07-19 23:54:50,338 - sglang - INFO - batch = self.get_next_batch_to_run()
  8200. 2025-07-19 23:54:50,338 - __main__ - INFO - batch = self.get_next_batch_to_run()
  8201. 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8202. 2025-07-19 23:54:50,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8203. 2025-07-19 23:54:50,338 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 870, in get_next_batch_to_run
  8204. 2025-07-19 23:54:50,338 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 870, in get_next_batch_to_run
  8205. 2025-07-19 23:54:50,338 - sglang - INFO - new_batch = self.get_new_batch_prefill()
  8206. 2025-07-19 23:54:50,338 - __main__ - INFO - new_batch = self.get_new_batch_prefill()
  8207. 2025-07-19 23:54:50,338 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8208. 2025-07-19 23:54:50,339 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8209. 2025-07-19 23:54:50,339 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 888, in get_new_batch_prefill
  8210. 2025-07-19 23:54:50,339 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 888, in get_new_batch_prefill
  8211. 2025-07-19 23:54:50,339 - sglang - INFO - def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
  8212. 2025-07-19 23:54:50,339 - __main__ - INFO - def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
  8213. 2025-07-19 23:54:50,339 - sglang - INFO -
  8214. 2025-07-19 23:54:50,339 - __main__ - INFO -
  8215. 2025-07-19 23:54:50,339 - sglang - INFO - KeyboardInterrupt
  8216. 2025-07-19 23:54:50,339 - __main__ - INFO - KeyboardInterrupt
  8217. 2025-07-19 23:54:50,340 - __main__ - INFO - Got cancellation request for SGLang server
  8218. 2025-07-19 23:57:11,442 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8219. 2025-07-19 23:57:11,442 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8220. 2025-07-19 23:57:11,442 - __main__ - INFO - Found 1 total pdf paths to add
  8221. 2025-07-19 23:57:11,447 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8222. 2025-07-19 23:57:11,683 - __main__ - INFO - Starting pipeline with PID 563187
  8223. 2025-07-19 23:57:11,683 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8224. 2025-07-19 23:57:17,476 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8225. 2025-07-19 23:57:19,571 - sglang - INFO - [2025-07-19 23:57:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=729530513, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8226. 2025-07-19 23:57:19,572 - __main__ - INFO - [2025-07-19 23:57:19] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=729530513, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8227. 2025-07-19 23:57:23,609 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8228. 2025-07-19 23:57:28,966 - sglang - INFO - [2025-07-19 23:57:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8229. 2025-07-19 23:57:28,966 - __main__ - INFO - [2025-07-19 23:57:28] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8230. 2025-07-19 23:57:29,709 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8231. 2025-07-19 23:57:30,229 - sglang - INFO - [2025-07-19 23:57:30 TP0] Overlap scheduler is disabled for multimodal models.
  8232. 2025-07-19 23:57:30,229 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Overlap scheduler is disabled for multimodal models.
  8233. 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8234. 2025-07-19 23:57:30,904 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8235. 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8236. 2025-07-19 23:57:30,904 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8237. 2025-07-19 23:57:30,904 - sglang - INFO - [2025-07-19 23:57:30 TP0] Init torch distributed begin.
  8238. 2025-07-19 23:57:30,905 - __main__ - INFO - [2025-07-19 23:57:30 TP0] Init torch distributed begin.
  8239. 2025-07-19 23:57:35,789 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8240. 2025-07-19 23:57:36,344 - sglang - INFO - [2025-07-19 23:57:36 TP0] Load weight begin. avail mem=23.33 GB
  8241. 2025-07-19 23:57:36,344 - __main__ - INFO - [2025-07-19 23:57:36 TP0] Load weight begin. avail mem=23.33 GB
  8242. 2025-07-19 23:57:37,496 - sglang - INFO - [2025-07-19 23:57:37 TP0] Using model weights format ['*.safetensors']
  8243. 2025-07-19 23:57:37,496 - __main__ - INFO - [2025-07-19 23:57:37 TP0] Using model weights format ['*.safetensors']
  8244. 2025-07-19 23:57:39,017 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8245. 2025-07-19 23:57:39,018 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8246. 2025-07-19 23:57:39,310 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.42it/s]
  8247. 2025-07-19 23:57:39,311 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.42it/s]
  8248. 2025-07-19 23:57:40,121 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  8249. 2025-07-19 23:57:40,121 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.67it/s]
  8250. 2025-07-19 23:57:40,903 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8251. 2025-07-19 23:57:40,903 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8252. 2025-07-19 23:57:41,781 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
  8253. 2025-07-19 23:57:41,781 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
  8254. 2025-07-19 23:57:41,781 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
  8255. 2025-07-19 23:57:41,781 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
  8256. 2025-07-19 23:57:41,781 - sglang - INFO -
  8257. 2025-07-19 23:57:41,781 - __main__ - INFO -
  8258. 2025-07-19 23:57:41,869 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8259. 2025-07-19 23:57:41,937 - sglang - INFO - [2025-07-19 23:57:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8260. 2025-07-19 23:57:41,937 - __main__ - INFO - [2025-07-19 23:57:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8261. 2025-07-19 23:57:41,944 - sglang - INFO - [2025-07-19 23:57:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8262. 2025-07-19 23:57:41,944 - __main__ - INFO - [2025-07-19 23:57:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8263. 2025-07-19 23:57:41,944 - sglang - INFO - [2025-07-19 23:57:41 TP0] Memory pool end. avail mem=5.30 GB
  8264. 2025-07-19 23:57:41,944 - __main__ - INFO - [2025-07-19 23:57:41 TP0] Memory pool end. avail mem=5.30 GB
  8265. 2025-07-19 23:57:42,127 - sglang - INFO - [2025-07-19 23:57:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  8266. 2025-07-19 23:57:42,127 - __main__ - INFO - [2025-07-19 23:57:42 TP0] Capture cuda graph begin. This can take up to several minutes.
  8267. 2025-07-19 23:57:44,022 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.17s/it] 50%|█████ | 2/4 [00:01<00:01, 1.61it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.23it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
  8268. 2025-07-19 23:57:44,022 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.17s/it] 50%|█████ | 2/4 [00:01<00:01, 1.61it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.23it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.11it/s]
  8269. 2025-07-19 23:57:44,023 - sglang - INFO - [2025-07-19 23:57:44 TP0] Capture cuda graph end. Time elapsed: 1.90 s
  8270. 2025-07-19 23:57:44,023 - __main__ - INFO - [2025-07-19 23:57:44 TP0] Capture cuda graph end. Time elapsed: 1.90 s
  8271. 2025-07-19 23:57:47,287 - sglang - INFO - [2025-07-19 23:57:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8272. 2025-07-19 23:57:47,288 - __main__ - INFO - [2025-07-19 23:57:47 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8273. 2025-07-19 23:57:47,371 - sglang - INFO - [2025-07-19 23:57:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8274. 2025-07-19 23:57:47,371 - __main__ - INFO - [2025-07-19 23:57:47] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8275. 2025-07-19 23:57:47,958 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8276. 2025-07-19 23:57:54,039 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8277. 2025-07-19 23:58:00,119 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8278. 2025-07-19 23:58:06,199 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8279. 2025-07-19 23:58:12,278 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  8280. 2025-07-19 23:58:18,359 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  8281. 2025-07-19 23:58:24,437 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  8282. 2025-07-19 23:58:30,517 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  8283. 2025-07-19 23:58:36,596 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  8284. 2025-07-19 23:58:42,675 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  8285. 2025-07-19 23:58:48,756 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  8286. 2025-07-19 23:58:54,835 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  8287. 2025-07-19 23:59:00,915 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  8288. 2025-07-19 23:59:06,995 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  8289. 2025-07-19 23:59:13,075 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  8290. 2025-07-19 23:59:19,155 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  8291. 2025-07-19 23:59:25,235 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  8292. 2025-07-19 23:59:31,315 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  8293. 2025-07-19 23:59:37,395 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  8294. 2025-07-19 23:59:43,475 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  8295. 2025-07-19 23:59:49,555 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  8296. 2025-07-19 23:59:55,634 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  8297. 2025-07-20 00:00:01,715 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  8298. 2025-07-20 00:00:07,785 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  8299. 2025-07-20 00:00:13,864 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  8300. 2025-07-20 00:00:19,944 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  8301. 2025-07-20 00:00:26,023 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  8302. 2025-07-20 00:00:32,104 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  8303. 2025-07-20 00:00:38,183 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  8304. 2025-07-20 00:00:44,263 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  8305. 2025-07-20 00:00:48,050 - sglang - INFO - Process Process-2:
  8306. 2025-07-20 00:00:48,050 - __main__ - INFO - Process Process-2:
  8307. 2025-07-20 00:00:48,050 - sglang - INFO - Process Process-1:
  8308. 2025-07-20 00:00:48,050 - __main__ - INFO - Process Process-1:
  8309. 2025-07-20 00:00:48,050 - __main__ - INFO - Got cancellation request for SGLang server
  8310. 2025-07-20 00:02:33,950 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8311. 2025-07-20 00:02:33,950 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8312. 2025-07-20 00:02:33,950 - __main__ - INFO - Found 1 total pdf paths to add
  8313. 2025-07-20 00:02:33,955 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8314. 2025-07-20 00:02:34,199 - __main__ - INFO - Starting pipeline with PID 564088
  8315. 2025-07-20 00:02:34,199 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8316. 2025-07-20 00:02:40,299 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8317. 2025-07-20 00:02:42,770 - sglang - INFO - [2025-07-20 00:02:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=169654265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8318. 2025-07-20 00:02:42,771 - __main__ - INFO - [2025-07-20 00:02:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=169654265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8319. 2025-07-20 00:02:46,501 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8320. 2025-07-20 00:02:51,927 - sglang - INFO - [2025-07-20 00:02:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8321. 2025-07-20 00:02:51,927 - __main__ - INFO - [2025-07-20 00:02:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8322. 2025-07-20 00:02:52,559 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8323. 2025-07-20 00:02:58,161 - sglang - INFO - [2025-07-20 00:02:58 TP0] Overlap scheduler is disabled for multimodal models.
  8324. 2025-07-20 00:02:58,161 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Overlap scheduler is disabled for multimodal models.
  8325. 2025-07-20 00:02:58,643 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8326. 2025-07-20 00:02:58,824 - sglang - INFO - [2025-07-20 00:02:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8327. 2025-07-20 00:02:58,824 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8328. 2025-07-20 00:02:58,824 - sglang - INFO - [2025-07-20 00:02:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8329. 2025-07-20 00:02:58,825 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8330. 2025-07-20 00:02:58,825 - sglang - INFO - [2025-07-20 00:02:58 TP0] Init torch distributed begin.
  8331. 2025-07-20 00:02:58,825 - __main__ - INFO - [2025-07-20 00:02:58 TP0] Init torch distributed begin.
  8332. 2025-07-20 00:03:04,227 - sglang - INFO - [2025-07-20 00:03:04 TP0] Load weight begin. avail mem=23.33 GB
  8333. 2025-07-20 00:03:04,228 - __main__ - INFO - [2025-07-20 00:03:04 TP0] Load weight begin. avail mem=23.33 GB
  8334. 2025-07-20 00:03:04,726 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8335. 2025-07-20 00:03:05,805 - sglang - INFO - [2025-07-20 00:03:05 TP0] Using model weights format ['*.safetensors']
  8336. 2025-07-20 00:03:05,805 - __main__ - INFO - [2025-07-20 00:03:05 TP0] Using model weights format ['*.safetensors']
  8337. 2025-07-20 00:03:06,331 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8338. 2025-07-20 00:03:06,331 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8339. 2025-07-20 00:03:06,620 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.47it/s]
  8340. 2025-07-20 00:03:06,620 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.47it/s]
  8341. 2025-07-20 00:03:07,428 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.68it/s]
  8342. 2025-07-20 00:03:07,428 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.68it/s]
  8343. 2025-07-20 00:03:08,211 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8344. 2025-07-20 00:03:08,211 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8345. 2025-07-20 00:03:09,089 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
  8346. 2025-07-20 00:03:09,089 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.32it/s]
  8347. 2025-07-20 00:03:09,089 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
  8348. 2025-07-20 00:03:09,089 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.45it/s]
  8349. 2025-07-20 00:03:09,089 - sglang - INFO -
  8350. 2025-07-20 00:03:09,090 - __main__ - INFO -
  8351. 2025-07-20 00:03:09,248 - sglang - INFO - [2025-07-20 00:03:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8352. 2025-07-20 00:03:09,248 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8353. 2025-07-20 00:03:09,255 - sglang - INFO - [2025-07-20 00:03:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8354. 2025-07-20 00:03:09,255 - __main__ - INFO - [2025-07-20 00:03:09 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8355. 2025-07-20 00:03:09,255 - sglang - INFO - [2025-07-20 00:03:09 TP0] Memory pool end. avail mem=5.30 GB
  8356. 2025-07-20 00:03:09,256 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Memory pool end. avail mem=5.30 GB
  8357. 2025-07-20 00:03:09,439 - sglang - INFO - [2025-07-20 00:03:09 TP0] Capture cuda graph begin. This can take up to several minutes.
  8358. 2025-07-20 00:03:09,439 - __main__ - INFO - [2025-07-20 00:03:09 TP0] Capture cuda graph begin. This can take up to several minutes.
  8359. 2025-07-20 00:03:10,809 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8360. 2025-07-20 00:03:11,352 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.18s/it] 50%|█████ | 2/4 [00:01<00:01, 1.59it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.21it/s] 100%|██████████| 4/4 [00:01<00:00, 2.70it/s] 100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
  8361. 2025-07-20 00:03:11,352 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.18s/it] 50%|█████ | 2/4 [00:01<00:01, 1.59it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.21it/s] 100%|██████████| 4/4 [00:01<00:00, 2.70it/s] 100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
  8362. 2025-07-20 00:03:11,353 - sglang - INFO - [2025-07-20 00:03:11 TP0] Capture cuda graph end. Time elapsed: 1.91 s
  8363. 2025-07-20 00:03:11,353 - __main__ - INFO - [2025-07-20 00:03:11 TP0] Capture cuda graph end. Time elapsed: 1.91 s
  8364. 2025-07-20 00:03:14,481 - sglang - INFO - [2025-07-20 00:03:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8365. 2025-07-20 00:03:14,481 - __main__ - INFO - [2025-07-20 00:03:14 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8366. 2025-07-20 00:03:14,563 - sglang - INFO - [2025-07-20 00:03:14] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8367. 2025-07-20 00:03:14,563 - __main__ - INFO - [2025-07-20 00:03:14] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8368. 2025-07-20 00:03:16,892 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8369. 2025-07-20 00:03:22,972 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8370. 2025-07-20 00:03:29,043 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8371. 2025-07-20 00:03:35,124 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  8372. 2025-07-20 00:03:41,169 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  8373. 2025-07-20 00:03:47,215 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  8374. 2025-07-20 00:03:53,260 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  8375. 2025-07-20 00:03:59,305 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  8376. 2025-07-20 00:04:05,350 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  8377. 2025-07-20 00:04:11,394 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  8378. 2025-07-20 00:04:17,440 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  8379. 2025-07-20 00:04:23,486 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  8380. 2025-07-20 00:04:29,567 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  8381. 2025-07-20 00:04:35,649 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  8382. 2025-07-20 00:04:41,729 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  8383. 2025-07-20 00:04:47,812 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  8384. 2025-07-20 00:04:53,893 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  8385. 2025-07-20 00:04:59,974 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  8386. 2025-07-20 00:05:06,055 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  8387. 2025-07-20 00:05:07,000 - __main__ - INFO - Got cancellation request for SGLang server
  8388. 2025-07-20 00:07:13,942 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8389. 2025-07-20 00:07:13,942 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8390. 2025-07-20 00:07:13,942 - __main__ - INFO - Found 1 total pdf paths to add
  8391. 2025-07-20 00:07:13,947 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8392. 2025-07-20 00:07:14,179 - __main__ - INFO - Starting pipeline with PID 565190
  8393. 2025-07-20 00:07:14,179 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8394. 2025-07-20 00:07:26,746 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8395. 2025-07-20 00:07:29,417 - sglang - INFO - [2025-07-20 00:07:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=578772715, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8396. 2025-07-20 00:07:29,417 - __main__ - INFO - [2025-07-20 00:07:29] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=578772715, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8397. 2025-07-20 00:07:32,827 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8398. 2025-07-20 00:07:38,849 - sglang - INFO - [2025-07-20 00:07:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8399. 2025-07-20 00:07:38,850 - __main__ - INFO - [2025-07-20 00:07:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8400. 2025-07-20 00:07:38,906 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8401. 2025-07-20 00:07:39,117 - sglang - INFO - [2025-07-20 00:07:39 TP0] Overlap scheduler is disabled for multimodal models.
  8402. 2025-07-20 00:07:39,117 - __main__ - INFO - [2025-07-20 00:07:39 TP0] Overlap scheduler is disabled for multimodal models.
  8403. 2025-07-20 00:07:40,082 - sglang - INFO - [2025-07-20 00:07:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8404. 2025-07-20 00:07:40,082 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8405. 2025-07-20 00:07:40,083 - sglang - INFO - [2025-07-20 00:07:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8406. 2025-07-20 00:07:40,083 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8407. 2025-07-20 00:07:40,083 - sglang - INFO - [2025-07-20 00:07:40 TP0] Init torch distributed begin.
  8408. 2025-07-20 00:07:40,083 - __main__ - INFO - [2025-07-20 00:07:40 TP0] Init torch distributed begin.
  8409. 2025-07-20 00:07:44,988 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8410. 2025-07-20 00:07:45,468 - sglang - INFO - [2025-07-20 00:07:45 TP0] Load weight begin. avail mem=23.33 GB
  8411. 2025-07-20 00:07:45,469 - __main__ - INFO - [2025-07-20 00:07:45 TP0] Load weight begin. avail mem=23.33 GB
  8412. 2025-07-20 00:07:46,609 - sglang - INFO - [2025-07-20 00:07:46 TP0] Using model weights format ['*.safetensors']
  8413. 2025-07-20 00:07:46,609 - __main__ - INFO - [2025-07-20 00:07:46 TP0] Using model weights format ['*.safetensors']
  8414. 2025-07-20 00:07:47,619 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8415. 2025-07-20 00:07:47,619 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8416. 2025-07-20 00:07:47,947 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.05it/s]
  8417. 2025-07-20 00:07:47,947 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.05it/s]
  8418. 2025-07-20 00:07:48,860 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  8419. 2025-07-20 00:07:48,860 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.49it/s]
  8420. 2025-07-20 00:07:49,735 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.31it/s]
  8421. 2025-07-20 00:07:49,735 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.31it/s]
  8422. 2025-07-20 00:07:50,729 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
  8423. 2025-07-20 00:07:50,729 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.17it/s]
  8424. 2025-07-20 00:07:50,729 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
  8425. 2025-07-20 00:07:50,729 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.29it/s]
  8426. 2025-07-20 00:07:50,730 - sglang - INFO -
  8427. 2025-07-20 00:07:50,730 - __main__ - INFO -
  8428. 2025-07-20 00:07:50,902 - sglang - INFO - [2025-07-20 00:07:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8429. 2025-07-20 00:07:50,902 - __main__ - INFO - [2025-07-20 00:07:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8430. 2025-07-20 00:07:50,910 - sglang - INFO - [2025-07-20 00:07:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8431. 2025-07-20 00:07:50,910 - __main__ - INFO - [2025-07-20 00:07:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8432. 2025-07-20 00:07:50,910 - sglang - INFO - [2025-07-20 00:07:50 TP0] Memory pool end. avail mem=5.30 GB
  8433. 2025-07-20 00:07:50,910 - __main__ - INFO - [2025-07-20 00:07:50 TP0] Memory pool end. avail mem=5.30 GB
  8434. 2025-07-20 00:07:51,069 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8435. 2025-07-20 00:07:51,129 - sglang - INFO - [2025-07-20 00:07:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  8436. 2025-07-20 00:07:51,129 - __main__ - INFO - [2025-07-20 00:07:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  8437. 2025-07-20 00:07:52,868 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.78it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.42it/s] 100%|██████████| 4/4 [00:01<00:00, 2.91it/s] 100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
  8438. 2025-07-20 00:07:52,868 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.78it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.42it/s] 100%|██████████| 4/4 [00:01<00:00, 2.91it/s] 100%|██████████| 4/4 [00:01<00:00, 2.31it/s]
  8439. 2025-07-20 00:07:52,868 - sglang - INFO - [2025-07-20 00:07:52 TP0] Capture cuda graph end. Time elapsed: 1.74 s
  8440. 2025-07-20 00:07:52,869 - __main__ - INFO - [2025-07-20 00:07:52 TP0] Capture cuda graph end. Time elapsed: 1.74 s
  8441. 2025-07-20 00:07:56,046 - sglang - INFO - [2025-07-20 00:07:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8442. 2025-07-20 00:07:56,046 - __main__ - INFO - [2025-07-20 00:07:56 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8443. 2025-07-20 00:07:56,132 - sglang - INFO - [2025-07-20 00:07:56] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8444. 2025-07-20 00:07:56,132 - __main__ - INFO - [2025-07-20 00:07:56] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8445. 2025-07-20 00:07:57,149 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8446. 2025-07-20 00:08:03,230 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8447. 2025-07-20 00:08:08,259 - sglang - INFO - Process Process-2:
  8448. 2025-07-20 00:08:08,259 - __main__ - INFO - Process Process-2:
  8449. 2025-07-20 00:08:08,259 - sglang - INFO - Process Process-1:
  8450. 2025-07-20 00:08:08,260 - __main__ - INFO - Process Process-1:
  8451. 2025-07-20 00:08:08,260 - __main__ - INFO - Got cancellation request for SGLang server
  8452. 2025-07-20 00:08:47,553 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8453. 2025-07-20 00:08:47,553 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8454. 2025-07-20 00:08:47,553 - __main__ - INFO - Found 1 total pdf paths to add
  8455. 2025-07-20 00:08:47,558 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8456. 2025-07-20 00:08:47,756 - __main__ - INFO - Starting pipeline with PID 566220
  8457. 2025-07-20 00:08:47,756 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8458. 2025-07-20 00:10:15,905 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8459. 2025-07-20 00:10:15,905 - __main__ - INFO - Loading file at scripts/data/11440000MB2D0234372440125017014.pdf as PDF document
  8460. 2025-07-20 00:10:15,906 - __main__ - INFO - Found 1 total pdf paths to add
  8461. 2025-07-20 00:10:15,911 - __main__ - INFO - Calculated items_per_group: 27 based on average pages per PDF: 18.00
  8462. 2025-07-20 00:10:16,145 - __main__ - INFO - Starting pipeline with PID 566313
  8463. 2025-07-20 00:10:16,145 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8464. 2025-07-20 00:10:21,832 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8465. 2025-07-20 00:10:23,926 - sglang - INFO - [2025-07-20 00:10:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=825752942, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8466. 2025-07-20 00:10:23,926 - __main__ - INFO - [2025-07-20 00:10:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=825752942, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8467. 2025-07-20 00:10:27,966 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8468. 2025-07-20 00:10:33,565 - sglang - INFO - [2025-07-20 00:10:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8469. 2025-07-20 00:10:33,565 - __main__ - INFO - [2025-07-20 00:10:33] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8470. 2025-07-20 00:10:34,041 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8471. 2025-07-20 00:10:39,849 - sglang - INFO - [2025-07-20 00:10:39 TP0] Overlap scheduler is disabled for multimodal models.
  8472. 2025-07-20 00:10:39,850 - __main__ - INFO - [2025-07-20 00:10:39 TP0] Overlap scheduler is disabled for multimodal models.
  8473. 2025-07-20 00:10:40,120 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8474. 2025-07-20 00:10:40,562 - sglang - INFO - [2025-07-20 00:10:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8475. 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8476. 2025-07-20 00:10:40,563 - sglang - INFO - [2025-07-20 00:10:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8477. 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8478. 2025-07-20 00:10:40,563 - sglang - INFO - [2025-07-20 00:10:40 TP0] Init torch distributed begin.
  8479. 2025-07-20 00:10:40,563 - __main__ - INFO - [2025-07-20 00:10:40 TP0] Init torch distributed begin.
  8480. 2025-07-20 00:10:45,965 - sglang - INFO - [2025-07-20 00:10:45 TP0] Load weight begin. avail mem=23.33 GB
  8481. 2025-07-20 00:10:45,966 - __main__ - INFO - [2025-07-20 00:10:45 TP0] Load weight begin. avail mem=23.33 GB
  8482. 2025-07-20 00:10:46,201 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8483. 2025-07-20 00:10:47,090 - sglang - INFO - [2025-07-20 00:10:47 TP0] Using model weights format ['*.safetensors']
  8484. 2025-07-20 00:10:47,090 - __main__ - INFO - [2025-07-20 00:10:47 TP0] Using model weights format ['*.safetensors']
  8485. 2025-07-20 00:10:47,688 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8486. 2025-07-20 00:10:47,688 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  8487. 2025-07-20 00:10:47,975 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
  8488. 2025-07-20 00:10:47,976 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 3.48it/s]
  8489. 2025-07-20 00:10:48,780 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.69it/s]
  8490. 2025-07-20 00:10:48,780 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.69it/s]
  8491. 2025-07-20 00:10:49,562 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8492. 2025-07-20 00:10:49,562 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.47it/s]
  8493. 2025-07-20 00:10:50,428 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
  8494. 2025-07-20 00:10:50,428 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.33it/s]
  8495. 2025-07-20 00:10:50,428 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
  8496. 2025-07-20 00:10:50,428 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.46it/s]
  8497. 2025-07-20 00:10:50,429 - sglang - INFO -
  8498. 2025-07-20 00:10:50,429 - __main__ - INFO -
  8499. 2025-07-20 00:10:50,585 - sglang - INFO - [2025-07-20 00:10:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8500. 2025-07-20 00:10:50,585 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  8501. 2025-07-20 00:10:50,592 - sglang - INFO - [2025-07-20 00:10:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8502. 2025-07-20 00:10:50,592 - __main__ - INFO - [2025-07-20 00:10:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  8503. 2025-07-20 00:10:50,592 - sglang - INFO - [2025-07-20 00:10:50 TP0] Memory pool end. avail mem=5.30 GB
  8504. 2025-07-20 00:10:50,592 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Memory pool end. avail mem=5.30 GB
  8505. 2025-07-20 00:10:50,776 - sglang - INFO - [2025-07-20 00:10:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  8506. 2025-07-20 00:10:50,776 - __main__ - INFO - [2025-07-20 00:10:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  8507. 2025-07-20 00:10:52,282 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8508. 2025-07-20 00:10:52,700 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.21s/it] 50%|█████ | 2/4 [00:01<00:01, 1.57it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.20it/s] 100%|██████████| 4/4 [00:01<00:00, 2.71it/s] 100%|██████████| 4/4 [00:01<00:00, 2.08it/s]
  8509. 2025-07-20 00:10:52,701 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.21s/it] 50%|█████ | 2/4 [00:01<00:01, 1.57it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.20it/s] 100%|██████████| 4/4 [00:01<00:00, 2.71it/s] 100%|██████████| 4/4 [00:01<00:00, 2.08it/s]
  8510. 2025-07-20 00:10:52,701 - sglang - INFO - [2025-07-20 00:10:52 TP0] Capture cuda graph end. Time elapsed: 1.92 s
  8511. 2025-07-20 00:10:52,701 - __main__ - INFO - [2025-07-20 00:10:52 TP0] Capture cuda graph end. Time elapsed: 1.92 s
  8512. 2025-07-20 00:10:55,599 - sglang - INFO - [2025-07-20 00:10:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8513. 2025-07-20 00:10:55,599 - __main__ - INFO - [2025-07-20 00:10:55 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  8514. 2025-07-20 00:10:55,680 - sglang - INFO - [2025-07-20 00:10:55] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8515. 2025-07-20 00:10:55,680 - __main__ - INFO - [2025-07-20 00:10:55] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  8516. 2025-07-20 00:10:58,361 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8517. 2025-07-20 00:11:04,440 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8518. 2025-07-20 00:11:10,519 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8519. 2025-07-20 00:11:12,074 - sglang - INFO - Process Process-1:
  8520. 2025-07-20 00:11:12,074 - __main__ - INFO - Process Process-1:
  8521. 2025-07-20 00:11:12,074 - sglang - INFO - Process Process-2:
  8522. 2025-07-20 00:11:12,074 - __main__ - INFO - Process Process-2:
  8523. 2025-07-20 00:11:12,075 - __main__ - INFO - Got cancellation request for SGLang server
  8524. 2025-07-20 11:07:46,350 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8525. 2025-07-20 11:07:46,350 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  8526. 2025-07-20 11:07:46,350 - __main__ - INFO - Found 1 total pdf paths to add
  8527. 2025-07-20 11:07:46,354 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  8528. 2025-07-20 11:07:46,542 - __main__ - INFO - Starting pipeline with PID 578329
  8529. 2025-07-20 11:07:46,542 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8530. 2025-07-20 11:10:01,984 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8531. 2025-07-20 11:10:04,397 - sglang - INFO - [2025-07-20 11:10:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=270783148, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8532. 2025-07-20 11:10:04,398 - __main__ - INFO - [2025-07-20 11:10:04] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=270783148, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8533. 2025-07-20 11:10:08,112 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8534. 2025-07-20 11:10:14,191 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8535. 2025-07-20 11:10:20,271 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8536. 2025-07-20 11:10:26,353 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8537. 2025-07-20 11:10:32,433 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8538. 2025-07-20 11:10:38,514 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8539. 2025-07-20 11:10:44,595 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8540. 2025-07-20 11:10:50,676 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8541. 2025-07-20 11:10:54,998 - sglang - INFO - Process Process-2:
  8542. 2025-07-20 11:10:54,999 - __main__ - INFO - Process Process-2:
  8543. 2025-07-20 11:10:54,999 - sglang - INFO - Process Process-1:
  8544. 2025-07-20 11:10:54,999 - __main__ - INFO - Process Process-1:
  8545. 2025-07-20 11:10:54,999 - __main__ - INFO - Got cancellation request for SGLang server
  8546. 2025-07-20 11:11:06,277 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  8547. 2025-07-20 11:11:06,278 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  8548. 2025-07-20 11:11:06,278 - __main__ - INFO - Found 1 total pdf paths to add
  8549. 2025-07-20 11:11:06,281 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  8550. 2025-07-20 11:11:06,492 - __main__ - INFO - Starting pipeline with PID 579071
  8551. 2025-07-20 11:11:06,492 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview'
  8552. 2025-07-20 11:13:22,698 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  8553. 2025-07-20 11:13:23,990 - sglang - INFO - [2025-07-20 11:13:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=381064224, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8554. 2025-07-20 11:13:23,990 - __main__ - INFO - [2025-07-20 11:13:23] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=381064224, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8555. 2025-07-20 11:13:28,777 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  8556. 2025-07-20 11:13:34,835 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  8557. 2025-07-20 11:13:40,915 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  8558. 2025-07-20 11:13:46,995 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  8559. 2025-07-20 11:13:53,076 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  8560. 2025-07-20 11:13:59,157 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  8561. 2025-07-20 11:14:05,239 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  8562. 2025-07-20 11:14:11,320 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  8563. 2025-07-20 11:14:17,401 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  8564. 2025-07-20 11:14:23,482 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  8565. 2025-07-20 11:14:29,564 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  8566. 2025-07-20 11:14:35,645 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  8567. 2025-07-20 11:14:41,725 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  8568. 2025-07-20 11:14:47,807 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  8569. 2025-07-20 11:14:53,888 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  8570. 2025-07-20 11:14:59,964 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  8571. 2025-07-20 11:15:00,760 - sglang - INFO - [2025-07-20 11:15:00] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8572. 2025-07-20 11:15:00,760 - __main__ - INFO - [2025-07-20 11:15:00] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8573. 2025-07-20 11:15:05,976 - sglang - INFO - [2025-07-20 11:15:05 TP0] Overlap scheduler is disabled for multimodal models.
  8574. 2025-07-20 11:15:05,976 - __main__ - INFO - [2025-07-20 11:15:05 TP0] Overlap scheduler is disabled for multimodal models.
  8575. 2025-07-20 11:15:06,043 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  8576. 2025-07-20 11:15:12,125 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  8577. 2025-07-20 11:15:18,205 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  8578. 2025-07-20 11:15:24,285 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  8579. 2025-07-20 11:15:26,126 - sglang - INFO - [2025-07-20 11:15:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8580. 2025-07-20 11:15:26,126 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8581. 2025-07-20 11:15:26,127 - sglang - INFO - [2025-07-20 11:15:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8582. 2025-07-20 11:15:26,127 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8583. 2025-07-20 11:15:26,127 - sglang - INFO - [2025-07-20 11:15:26 TP0] Init torch distributed begin.
  8584. 2025-07-20 11:15:26,127 - __main__ - INFO - [2025-07-20 11:15:26 TP0] Init torch distributed begin.
  8585. 2025-07-20 11:15:30,366 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  8586. 2025-07-20 11:15:31,530 - sglang - INFO - [2025-07-20 11:15:31 TP0] Load weight begin. avail mem=23.33 GB
  8587. 2025-07-20 11:15:31,530 - __main__ - INFO - [2025-07-20 11:15:31 TP0] Load weight begin. avail mem=23.33 GB
  8588. 2025-07-20 11:15:36,448 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  8589. 2025-07-20 11:15:42,229 - sglang - INFO - [2025-07-20 11:15:42 TP0] Scheduler hit an exception: Traceback (most recent call last):
  8590. 2025-07-20 11:15:42,230 - __main__ - INFO - [2025-07-20 11:15:42 TP0] Scheduler hit an exception: Traceback (most recent call last):
  8591. 2025-07-20 11:15:42,230 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  8592. 2025-07-20 11:15:42,230 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  8593. 2025-07-20 11:15:42,230 - sglang - INFO - sock = connection.create_connection(
  8594. 2025-07-20 11:15:42,230 - __main__ - INFO - sock = connection.create_connection(
  8595. 2025-07-20 11:15:42,230 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8596. 2025-07-20 11:15:42,230 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8597. 2025-07-20 11:15:42,230 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  8598. 2025-07-20 11:15:42,230 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  8599. 2025-07-20 11:15:42,230 - sglang - INFO - raise err
  8600. 2025-07-20 11:15:42,230 - __main__ - INFO - raise err
  8601. 2025-07-20 11:15:42,231 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  8602. 2025-07-20 11:15:42,231 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  8603. 2025-07-20 11:15:42,231 - sglang - INFO - sock.connect(sa)
  8604. 2025-07-20 11:15:42,231 - __main__ - INFO - sock.connect(sa)
  8605. 2025-07-20 11:15:42,231 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  8606. 2025-07-20 11:15:42,231 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  8607. 2025-07-20 11:15:42,231 - sglang - INFO -
  8608. 2025-07-20 11:15:42,231 - __main__ - INFO -
  8609. 2025-07-20 11:15:42,231 - sglang - INFO - The above exception was the direct cause of the following exception:
  8610. 2025-07-20 11:15:42,231 - __main__ - INFO - The above exception was the direct cause of the following exception:
  8611. 2025-07-20 11:15:42,231 - sglang - INFO -
  8612. 2025-07-20 11:15:42,231 - __main__ - INFO -
  8613. 2025-07-20 11:15:42,231 - sglang - INFO - Traceback (most recent call last):
  8614. 2025-07-20 11:15:42,232 - __main__ - INFO - Traceback (most recent call last):
  8615. 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  8616. 2025-07-20 11:15:42,232 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  8617. 2025-07-20 11:15:42,232 - sglang - INFO - response = self._make_request(
  8618. 2025-07-20 11:15:42,232 - __main__ - INFO - response = self._make_request(
  8619. 2025-07-20 11:15:42,232 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  8620. 2025-07-20 11:15:42,232 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  8621. 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  8622. 2025-07-20 11:15:42,232 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  8623. 2025-07-20 11:15:42,232 - sglang - INFO - raise new_e
  8624. 2025-07-20 11:15:42,232 - __main__ - INFO - raise new_e
  8625. 2025-07-20 11:15:42,232 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  8626. 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  8627. 2025-07-20 11:15:42,233 - sglang - INFO - self._validate_conn(conn)
  8628. 2025-07-20 11:15:42,233 - __main__ - INFO - self._validate_conn(conn)
  8629. 2025-07-20 11:15:42,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  8630. 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  8631. 2025-07-20 11:15:42,233 - sglang - INFO - conn.connect()
  8632. 2025-07-20 11:15:42,233 - __main__ - INFO - conn.connect()
  8633. 2025-07-20 11:15:42,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  8634. 2025-07-20 11:15:42,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  8635. 2025-07-20 11:15:42,233 - sglang - INFO - self.sock = sock = self._new_conn()
  8636. 2025-07-20 11:15:42,233 - __main__ - INFO - self.sock = sock = self._new_conn()
  8637. 2025-07-20 11:15:42,233 - sglang - INFO - ^^^^^^^^^^^^^^^^
  8638. 2025-07-20 11:15:42,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  8639. 2025-07-20 11:15:42,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  8640. 2025-07-20 11:15:42,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  8641. 2025-07-20 11:15:42,234 - sglang - INFO - raise NewConnectionError(
  8642. 2025-07-20 11:15:42,234 - __main__ - INFO - raise NewConnectionError(
  8643. 2025-07-20 11:15:42,234 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable
  8644. 2025-07-20 11:15:42,234 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable
  8645. 2025-07-20 11:15:42,234 - sglang - INFO -
  8646. 2025-07-20 11:15:42,234 - __main__ - INFO -
  8647. 2025-07-20 11:15:42,234 - sglang - INFO - The above exception was the direct cause of the following exception:
  8648. 2025-07-20 11:15:42,234 - __main__ - INFO - The above exception was the direct cause of the following exception:
  8649. 2025-07-20 11:15:42,234 - sglang - INFO -
  8650. 2025-07-20 11:15:42,234 - __main__ - INFO -
  8651. 2025-07-20 11:15:42,234 - sglang - INFO - Traceback (most recent call last):
  8652. 2025-07-20 11:15:42,235 - __main__ - INFO - Traceback (most recent call last):
  8653. 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  8654. 2025-07-20 11:15:42,235 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  8655. 2025-07-20 11:15:42,235 - sglang - INFO - resp = conn.urlopen(
  8656. 2025-07-20 11:15:42,235 - __main__ - INFO - resp = conn.urlopen(
  8657. 2025-07-20 11:15:42,235 - sglang - INFO - ^^^^^^^^^^^^^
  8658. 2025-07-20 11:15:42,235 - __main__ - INFO - ^^^^^^^^^^^^^
  8659. 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  8660. 2025-07-20 11:15:42,235 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  8661. 2025-07-20 11:15:42,235 - sglang - INFO - retries = retries.increment(
  8662. 2025-07-20 11:15:42,235 - __main__ - INFO - retries = retries.increment(
  8663. 2025-07-20 11:15:42,235 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  8664. 2025-07-20 11:15:42,235 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  8665. 2025-07-20 11:15:42,235 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  8666. 2025-07-20 11:15:42,236 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  8667. 2025-07-20 11:15:42,236 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  8668. 2025-07-20 11:15:42,236 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  8669. 2025-07-20 11:15:42,236 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8670. 2025-07-20 11:15:42,236 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8671. 2025-07-20 11:15:42,236 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  8672. 2025-07-20 11:15:42,236 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  8673. 2025-07-20 11:15:42,236 - sglang - INFO -
  8674. 2025-07-20 11:15:42,236 - __main__ - INFO -
  8675. 2025-07-20 11:15:42,236 - sglang - INFO - During handling of the above exception, another exception occurred:
  8676. 2025-07-20 11:15:42,236 - __main__ - INFO - During handling of the above exception, another exception occurred:
  8677. 2025-07-20 11:15:42,236 - sglang - INFO -
  8678. 2025-07-20 11:15:42,236 - __main__ - INFO -
  8679. 2025-07-20 11:15:42,237 - sglang - INFO - Traceback (most recent call last):
  8680. 2025-07-20 11:15:42,237 - __main__ - INFO - Traceback (most recent call last):
  8681. 2025-07-20 11:15:42,237 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  8682. 2025-07-20 11:15:42,237 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  8683. 2025-07-20 11:15:42,237 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  8684. 2025-07-20 11:15:42,237 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  8685. 2025-07-20 11:15:42,237 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8686. 2025-07-20 11:15:42,237 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8687. 2025-07-20 11:15:42,237 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  8688. 2025-07-20 11:15:42,237 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  8689. 2025-07-20 11:15:42,237 - sglang - INFO - self.tp_worker = TpWorkerClass(
  8690. 2025-07-20 11:15:42,237 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  8691. 2025-07-20 11:15:42,237 - sglang - INFO - ^^^^^^^^^^^^^^
  8692. 2025-07-20 11:15:42,238 - __main__ - INFO - ^^^^^^^^^^^^^^
  8693. 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  8694. 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  8695. 2025-07-20 11:15:42,238 - sglang - INFO - self.model_runner = ModelRunner(
  8696. 2025-07-20 11:15:42,238 - __main__ - INFO - self.model_runner = ModelRunner(
  8697. 2025-07-20 11:15:42,238 - sglang - INFO - ^^^^^^^^^^^^
  8698. 2025-07-20 11:15:42,238 - __main__ - INFO - ^^^^^^^^^^^^
  8699. 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  8700. 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  8701. 2025-07-20 11:15:42,238 - sglang - INFO - self.load_model()
  8702. 2025-07-20 11:15:42,238 - __main__ - INFO - self.load_model()
  8703. 2025-07-20 11:15:42,238 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  8704. 2025-07-20 11:15:42,238 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  8705. 2025-07-20 11:15:42,239 - sglang - INFO - self.model = get_model(
  8706. 2025-07-20 11:15:42,239 - __main__ - INFO - self.model = get_model(
  8707. 2025-07-20 11:15:42,239 - sglang - INFO - ^^^^^^^^^^
  8708. 2025-07-20 11:15:42,239 - __main__ - INFO - ^^^^^^^^^^
  8709. 2025-07-20 11:15:42,239 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  8710. 2025-07-20 11:15:42,239 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  8711. 2025-07-20 11:15:42,239 - sglang - INFO - return loader.load_model(
  8712. 2025-07-20 11:15:42,239 - __main__ - INFO - return loader.load_model(
  8713. 2025-07-20 11:15:42,239 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  8714. 2025-07-20 11:15:42,239 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  8715. 2025-07-20 11:15:42,239 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  8716. 2025-07-20 11:15:42,239 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  8717. 2025-07-20 11:15:42,239 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  8718. 2025-07-20 11:15:42,239 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  8719. 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  8720. 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  8721. 2025-07-20 11:15:42,240 - sglang - INFO - for name, loaded_weight in weights:
  8722. 2025-07-20 11:15:42,240 - __main__ - INFO - for name, loaded_weight in weights:
  8723. 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  8724. 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  8725. 2025-07-20 11:15:42,240 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  8726. 2025-07-20 11:15:42,240 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  8727. 2025-07-20 11:15:42,240 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8728. 2025-07-20 11:15:42,240 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8729. 2025-07-20 11:15:42,240 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  8730. 2025-07-20 11:15:42,240 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  8731. 2025-07-20 11:15:42,241 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  8732. 2025-07-20 11:15:42,241 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  8733. 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  8734. 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  8735. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  8736. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  8737. 2025-07-20 11:15:42,241 - sglang - INFO - hf_folder = download_weights_from_hf(
  8738. 2025-07-20 11:15:42,241 - __main__ - INFO - hf_folder = download_weights_from_hf(
  8739. 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  8740. 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  8741. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  8742. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  8743. 2025-07-20 11:15:42,241 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  8744. 2025-07-20 11:15:42,241 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  8745. 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8746. 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8747. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  8748. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  8749. 2025-07-20 11:15:42,241 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  8750. 2025-07-20 11:15:42,241 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  8751. 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8752. 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8753. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  8754. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  8755. 2025-07-20 11:15:42,241 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  8756. 2025-07-20 11:15:42,241 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  8757. 2025-07-20 11:15:42,241 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8758. 2025-07-20 11:15:42,241 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8759. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  8760. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  8761. 2025-07-20 11:15:42,241 - sglang - INFO - self._api.repo_info(
  8762. 2025-07-20 11:15:42,241 - __main__ - INFO - self._api.repo_info(
  8763. 2025-07-20 11:15:42,241 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  8764. 2025-07-20 11:15:42,241 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  8765. 2025-07-20 11:15:42,242 - sglang - INFO - return fn(*args, **kwargs)
  8766. 2025-07-20 11:15:42,242 - __main__ - INFO - return fn(*args, **kwargs)
  8767. 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  8768. 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  8769. 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  8770. 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  8771. 2025-07-20 11:15:42,242 - sglang - INFO - return method(
  8772. 2025-07-20 11:15:42,242 - __main__ - INFO - return method(
  8773. 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^
  8774. 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^
  8775. 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  8776. 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  8777. 2025-07-20 11:15:42,242 - sglang - INFO - return fn(*args, **kwargs)
  8778. 2025-07-20 11:15:42,242 - __main__ - INFO - return fn(*args, **kwargs)
  8779. 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  8780. 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  8781. 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  8782. 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  8783. 2025-07-20 11:15:42,242 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  8784. 2025-07-20 11:15:42,242 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  8785. 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8786. 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8787. 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  8788. 2025-07-20 11:15:42,242 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  8789. 2025-07-20 11:15:42,242 - sglang - INFO - return self.request("GET", url, **kwargs)
  8790. 2025-07-20 11:15:42,242 - __main__ - INFO - return self.request("GET", url, **kwargs)
  8791. 2025-07-20 11:15:42,242 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8792. 2025-07-20 11:15:42,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8793. 2025-07-20 11:15:42,242 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  8794. 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  8795. 2025-07-20 11:15:42,243 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  8796. 2025-07-20 11:15:42,243 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  8797. 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8798. 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8799. 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  8800. 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  8801. 2025-07-20 11:15:42,243 - sglang - INFO - r = adapter.send(request, **kwargs)
  8802. 2025-07-20 11:15:42,243 - __main__ - INFO - r = adapter.send(request, **kwargs)
  8803. 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8804. 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8805. 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  8806. 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  8807. 2025-07-20 11:15:42,243 - sglang - INFO - return super().send(request, *args, **kwargs)
  8808. 2025-07-20 11:15:42,243 - __main__ - INFO - return super().send(request, *args, **kwargs)
  8809. 2025-07-20 11:15:42,243 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8810. 2025-07-20 11:15:42,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8811. 2025-07-20 11:15:42,243 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  8812. 2025-07-20 11:15:42,243 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  8813. 2025-07-20 11:15:42,243 - sglang - INFO - raise ConnectionError(e, request=request)
  8814. 2025-07-20 11:15:42,243 - __main__ - INFO - raise ConnectionError(e, request=request)
  8815. 2025-07-20 11:15:42,243 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: d7db9d3b-7988-48c6-ac50-b06366e3a9c9)')
  8816. 2025-07-20 11:15:42,243 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f2050723850>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: d7db9d3b-7988-48c6-ac50-b06366e3a9c9)')
  8817. 2025-07-20 11:15:42,243 - sglang - INFO -
  8818. 2025-07-20 11:15:42,243 - __main__ - INFO -
  8819. 2025-07-20 11:15:42,244 - sglang - INFO - [2025-07-20 11:15:42] Received sigquit from a child proces. It usually means the child failed.
  8820. 2025-07-20 11:15:42,244 - __main__ - INFO - [2025-07-20 11:15:42] Received sigquit from a child proces. It usually means the child failed.
  8821. 2025-07-20 11:15:42,475 - __main__ - WARNING - SGLang server task ended
  8822. 2025-07-20 11:15:42,528 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  8823. 2025-07-20 11:15:48,512 - sglang - INFO - [2025-07-20 11:15:48] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=442733111, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8824. 2025-07-20 11:15:48,512 - __main__ - INFO - [2025-07-20 11:15:48] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=442733111, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  8825. 2025-07-20 11:15:48,633 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  8826. 2025-07-20 11:15:54,713 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  8827. 2025-07-20 11:16:00,794 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  8828. 2025-07-20 11:16:06,873 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  8829. 2025-07-20 11:16:12,956 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  8830. 2025-07-20 11:16:19,037 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  8831. 2025-07-20 11:16:25,120 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  8832. 2025-07-20 11:16:31,201 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  8833. 2025-07-20 11:16:37,283 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  8834. 2025-07-20 11:16:43,365 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  8835. 2025-07-20 11:16:49,446 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  8836. 2025-07-20 11:16:55,529 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  8837. 2025-07-20 11:17:01,610 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  8838. 2025-07-20 11:17:07,695 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  8839. 2025-07-20 11:17:13,776 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  8840. 2025-07-20 11:17:19,857 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  8841. 2025-07-20 11:17:25,201 - sglang - INFO - [2025-07-20 11:17:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8842. 2025-07-20 11:17:25,201 - __main__ - INFO - [2025-07-20 11:17:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  8843. 2025-07-20 11:17:25,937 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  8844. 2025-07-20 11:17:30,921 - sglang - INFO - [2025-07-20 11:17:30 TP0] Overlap scheduler is disabled for multimodal models.
  8845. 2025-07-20 11:17:30,921 - __main__ - INFO - [2025-07-20 11:17:30 TP0] Overlap scheduler is disabled for multimodal models.
  8846. 2025-07-20 11:17:32,018 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  8847. 2025-07-20 11:17:38,101 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  8848. 2025-07-20 11:17:44,181 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  8849. 2025-07-20 11:17:50,263 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  8850. 2025-07-20 11:17:51,072 - sglang - INFO - [2025-07-20 11:17:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8851. 2025-07-20 11:17:51,072 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  8852. 2025-07-20 11:17:51,073 - sglang - INFO - [2025-07-20 11:17:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8853. 2025-07-20 11:17:51,073 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  8854. 2025-07-20 11:17:51,073 - sglang - INFO - [2025-07-20 11:17:51 TP0] Init torch distributed begin.
  8855. 2025-07-20 11:17:51,073 - __main__ - INFO - [2025-07-20 11:17:51 TP0] Init torch distributed begin.
  8856. 2025-07-20 11:17:56,343 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  8857. 2025-07-20 11:17:56,482 - sglang - INFO - [2025-07-20 11:17:56 TP0] Load weight begin. avail mem=23.33 GB
  8858. 2025-07-20 11:17:56,482 - __main__ - INFO - [2025-07-20 11:17:56 TP0] Load weight begin. avail mem=23.33 GB
  8859. 2025-07-20 11:18:02,424 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  8860. 2025-07-20 11:18:07,166 - sglang - INFO - [2025-07-20 11:18:07 TP0] Scheduler hit an exception: Traceback (most recent call last):
  8861. 2025-07-20 11:18:07,166 - __main__ - INFO - [2025-07-20 11:18:07 TP0] Scheduler hit an exception: Traceback (most recent call last):
  8862. 2025-07-20 11:18:07,166 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  8863. 2025-07-20 11:18:07,166 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  8864. 2025-07-20 11:18:07,166 - sglang - INFO - sock = connection.create_connection(
  8865. 2025-07-20 11:18:07,166 - __main__ - INFO - sock = connection.create_connection(
  8866. 2025-07-20 11:18:07,167 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8867. 2025-07-20 11:18:07,167 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8868. 2025-07-20 11:18:07,167 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  8869. 2025-07-20 11:18:07,167 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  8870. 2025-07-20 11:18:07,167 - sglang - INFO - raise err
  8871. 2025-07-20 11:18:07,167 - __main__ - INFO - raise err
  8872. 2025-07-20 11:18:07,167 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  8873. 2025-07-20 11:18:07,167 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  8874. 2025-07-20 11:18:07,167 - sglang - INFO - sock.connect(sa)
  8875. 2025-07-20 11:18:07,167 - __main__ - INFO - sock.connect(sa)
  8876. 2025-07-20 11:18:07,167 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  8877. 2025-07-20 11:18:07,168 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  8878. 2025-07-20 11:18:07,168 - sglang - INFO -
  8879. 2025-07-20 11:18:07,168 - __main__ - INFO -
  8880. 2025-07-20 11:18:07,168 - sglang - INFO - The above exception was the direct cause of the following exception:
  8881. 2025-07-20 11:18:07,168 - __main__ - INFO - The above exception was the direct cause of the following exception:
  8882. 2025-07-20 11:18:07,168 - sglang - INFO -
  8883. 2025-07-20 11:18:07,168 - __main__ - INFO -
  8884. 2025-07-20 11:18:07,168 - sglang - INFO - Traceback (most recent call last):
  8885. 2025-07-20 11:18:07,168 - __main__ - INFO - Traceback (most recent call last):
  8886. 2025-07-20 11:18:07,168 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  8887. 2025-07-20 11:18:07,168 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  8888. 2025-07-20 11:18:07,168 - sglang - INFO - response = self._make_request(
  8889. 2025-07-20 11:18:07,168 - __main__ - INFO - response = self._make_request(
  8890. 2025-07-20 11:18:07,169 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  8891. 2025-07-20 11:18:07,169 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  8892. 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  8893. 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  8894. 2025-07-20 11:18:07,169 - sglang - INFO - raise new_e
  8895. 2025-07-20 11:18:07,169 - __main__ - INFO - raise new_e
  8896. 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  8897. 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  8898. 2025-07-20 11:18:07,169 - sglang - INFO - self._validate_conn(conn)
  8899. 2025-07-20 11:18:07,169 - __main__ - INFO - self._validate_conn(conn)
  8900. 2025-07-20 11:18:07,169 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  8901. 2025-07-20 11:18:07,169 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  8902. 2025-07-20 11:18:07,169 - sglang - INFO - conn.connect()
  8903. 2025-07-20 11:18:07,170 - __main__ - INFO - conn.connect()
  8904. 2025-07-20 11:18:07,170 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  8905. 2025-07-20 11:18:07,170 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  8906. 2025-07-20 11:18:07,170 - sglang - INFO - self.sock = sock = self._new_conn()
  8907. 2025-07-20 11:18:07,170 - __main__ - INFO - self.sock = sock = self._new_conn()
  8908. 2025-07-20 11:18:07,170 - sglang - INFO - ^^^^^^^^^^^^^^^^
  8909. 2025-07-20 11:18:07,170 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  8910. 2025-07-20 11:18:07,170 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  8911. 2025-07-20 11:18:07,170 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  8912. 2025-07-20 11:18:07,170 - sglang - INFO - raise NewConnectionError(
  8913. 2025-07-20 11:18:07,170 - __main__ - INFO - raise NewConnectionError(
  8914. 2025-07-20 11:18:07,170 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable
  8915. 2025-07-20 11:18:07,170 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable
  8916. 2025-07-20 11:18:07,171 - sglang - INFO -
  8917. 2025-07-20 11:18:07,171 - __main__ - INFO -
  8918. 2025-07-20 11:18:07,171 - sglang - INFO - The above exception was the direct cause of the following exception:
  8919. 2025-07-20 11:18:07,171 - __main__ - INFO - The above exception was the direct cause of the following exception:
  8920. 2025-07-20 11:18:07,171 - sglang - INFO -
  8921. 2025-07-20 11:18:07,171 - __main__ - INFO -
  8922. 2025-07-20 11:18:07,171 - sglang - INFO - Traceback (most recent call last):
  8923. 2025-07-20 11:18:07,171 - __main__ - INFO - Traceback (most recent call last):
  8924. 2025-07-20 11:18:07,171 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  8925. 2025-07-20 11:18:07,171 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  8926. 2025-07-20 11:18:07,171 - sglang - INFO - resp = conn.urlopen(
  8927. 2025-07-20 11:18:07,171 - __main__ - INFO - resp = conn.urlopen(
  8928. 2025-07-20 11:18:07,171 - sglang - INFO - ^^^^^^^^^^^^^
  8929. 2025-07-20 11:18:07,171 - __main__ - INFO - ^^^^^^^^^^^^^
  8930. 2025-07-20 11:18:07,172 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  8931. 2025-07-20 11:18:07,172 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  8932. 2025-07-20 11:18:07,172 - sglang - INFO - retries = retries.increment(
  8933. 2025-07-20 11:18:07,172 - __main__ - INFO - retries = retries.increment(
  8934. 2025-07-20 11:18:07,172 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  8935. 2025-07-20 11:18:07,172 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  8936. 2025-07-20 11:18:07,172 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  8937. 2025-07-20 11:18:07,172 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  8938. 2025-07-20 11:18:07,172 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  8939. 2025-07-20 11:18:07,172 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  8940. 2025-07-20 11:18:07,172 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8941. 2025-07-20 11:18:07,172 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8942. 2025-07-20 11:18:07,173 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  8943. 2025-07-20 11:18:07,173 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  8944. 2025-07-20 11:18:07,173 - sglang - INFO -
  8945. 2025-07-20 11:18:07,173 - __main__ - INFO -
  8946. 2025-07-20 11:18:07,173 - sglang - INFO - During handling of the above exception, another exception occurred:
  8947. 2025-07-20 11:18:07,173 - __main__ - INFO - During handling of the above exception, another exception occurred:
  8948. 2025-07-20 11:18:07,173 - sglang - INFO -
  8949. 2025-07-20 11:18:07,173 - __main__ - INFO -
  8950. 2025-07-20 11:18:07,173 - sglang - INFO - Traceback (most recent call last):
  8951. 2025-07-20 11:18:07,173 - __main__ - INFO - Traceback (most recent call last):
  8952. 2025-07-20 11:18:07,173 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  8953. 2025-07-20 11:18:07,173 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  8954. 2025-07-20 11:18:07,173 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  8955. 2025-07-20 11:18:07,174 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  8956. 2025-07-20 11:18:07,174 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8957. 2025-07-20 11:18:07,174 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8958. 2025-07-20 11:18:07,174 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  8959. 2025-07-20 11:18:07,174 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  8960. 2025-07-20 11:18:07,174 - sglang - INFO - self.tp_worker = TpWorkerClass(
  8961. 2025-07-20 11:18:07,174 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  8962. 2025-07-20 11:18:07,174 - sglang - INFO - ^^^^^^^^^^^^^^
  8963. 2025-07-20 11:18:07,174 - __main__ - INFO - ^^^^^^^^^^^^^^
  8964. 2025-07-20 11:18:07,174 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  8965. 2025-07-20 11:18:07,174 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  8966. 2025-07-20 11:18:07,174 - sglang - INFO - self.model_runner = ModelRunner(
  8967. 2025-07-20 11:18:07,174 - __main__ - INFO - self.model_runner = ModelRunner(
  8968. 2025-07-20 11:18:07,175 - sglang - INFO - ^^^^^^^^^^^^
  8969. 2025-07-20 11:18:07,175 - __main__ - INFO - ^^^^^^^^^^^^
  8970. 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  8971. 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  8972. 2025-07-20 11:18:07,175 - sglang - INFO - self.load_model()
  8973. 2025-07-20 11:18:07,175 - __main__ - INFO - self.load_model()
  8974. 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  8975. 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  8976. 2025-07-20 11:18:07,175 - sglang - INFO - self.model = get_model(
  8977. 2025-07-20 11:18:07,175 - __main__ - INFO - self.model = get_model(
  8978. 2025-07-20 11:18:07,175 - sglang - INFO - ^^^^^^^^^^
  8979. 2025-07-20 11:18:07,175 - __main__ - INFO - ^^^^^^^^^^
  8980. 2025-07-20 11:18:07,175 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  8981. 2025-07-20 11:18:07,175 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  8982. 2025-07-20 11:18:07,176 - sglang - INFO - return loader.load_model(
  8983. 2025-07-20 11:18:07,176 - __main__ - INFO - return loader.load_model(
  8984. 2025-07-20 11:18:07,176 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  8985. 2025-07-20 11:18:07,176 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  8986. 2025-07-20 11:18:07,176 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  8987. 2025-07-20 11:18:07,176 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  8988. 2025-07-20 11:18:07,176 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  8989. 2025-07-20 11:18:07,176 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  8990. 2025-07-20 11:18:07,176 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  8991. 2025-07-20 11:18:07,176 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  8992. 2025-07-20 11:18:07,176 - sglang - INFO - for name, loaded_weight in weights:
  8993. 2025-07-20 11:18:07,176 - __main__ - INFO - for name, loaded_weight in weights:
  8994. 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  8995. 2025-07-20 11:18:07,177 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  8996. 2025-07-20 11:18:07,177 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  8997. 2025-07-20 11:18:07,177 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  8998. 2025-07-20 11:18:07,177 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  8999. 2025-07-20 11:18:07,177 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9000. 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9001. 2025-07-20 11:18:07,177 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9002. 2025-07-20 11:18:07,177 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9003. 2025-07-20 11:18:07,177 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9004. 2025-07-20 11:18:07,177 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9005. 2025-07-20 11:18:07,177 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9006. 2025-07-20 11:18:07,177 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9007. 2025-07-20 11:18:07,178 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9008. 2025-07-20 11:18:07,178 - sglang - INFO - hf_folder = download_weights_from_hf(
  9009. 2025-07-20 11:18:07,178 - __main__ - INFO - hf_folder = download_weights_from_hf(
  9010. 2025-07-20 11:18:07,178 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9011. 2025-07-20 11:18:07,178 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9012. 2025-07-20 11:18:07,178 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9013. 2025-07-20 11:18:07,178 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9014. 2025-07-20 11:18:07,178 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9015. 2025-07-20 11:18:07,178 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9016. 2025-07-20 11:18:07,178 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9017. 2025-07-20 11:18:07,178 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9018. 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9019. 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9020. 2025-07-20 11:18:07,180 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9021. 2025-07-20 11:18:07,180 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9022. 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9023. 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9024. 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9025. 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9026. 2025-07-20 11:18:07,180 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9027. 2025-07-20 11:18:07,180 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9028. 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9029. 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9030. 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9031. 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9032. 2025-07-20 11:18:07,180 - sglang - INFO - self._api.repo_info(
  9033. 2025-07-20 11:18:07,180 - __main__ - INFO - self._api.repo_info(
  9034. 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9035. 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9036. 2025-07-20 11:18:07,180 - sglang - INFO - return fn(*args, **kwargs)
  9037. 2025-07-20 11:18:07,180 - __main__ - INFO - return fn(*args, **kwargs)
  9038. 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9039. 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9040. 2025-07-20 11:18:07,180 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9041. 2025-07-20 11:18:07,180 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9042. 2025-07-20 11:18:07,180 - sglang - INFO - return method(
  9043. 2025-07-20 11:18:07,180 - __main__ - INFO - return method(
  9044. 2025-07-20 11:18:07,180 - sglang - INFO - ^^^^^^^
  9045. 2025-07-20 11:18:07,180 - __main__ - INFO - ^^^^^^^
  9046. 2025-07-20 11:18:07,181 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9047. 2025-07-20 11:18:07,181 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9048. 2025-07-20 11:18:07,181 - sglang - INFO - return fn(*args, **kwargs)
  9049. 2025-07-20 11:18:07,233 - __main__ - INFO - return fn(*args, **kwargs)
  9050. 2025-07-20 11:18:07,233 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9051. 2025-07-20 11:18:07,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9052. 2025-07-20 11:18:07,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9053. 2025-07-20 11:18:07,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9054. 2025-07-20 11:18:07,233 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9055. 2025-07-20 11:18:07,233 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9056. 2025-07-20 11:18:07,233 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9057. 2025-07-20 11:18:07,233 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9058. 2025-07-20 11:18:07,233 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9059. 2025-07-20 11:18:07,233 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9060. 2025-07-20 11:18:07,234 - sglang - INFO - return self.request("GET", url, **kwargs)
  9061. 2025-07-20 11:18:07,234 - __main__ - INFO - return self.request("GET", url, **kwargs)
  9062. 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9063. 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9064. 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9065. 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9066. 2025-07-20 11:18:07,234 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  9067. 2025-07-20 11:18:07,234 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  9068. 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9069. 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9070. 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9071. 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9072. 2025-07-20 11:18:07,234 - sglang - INFO - r = adapter.send(request, **kwargs)
  9073. 2025-07-20 11:18:07,234 - __main__ - INFO - r = adapter.send(request, **kwargs)
  9074. 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9075. 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9076. 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9077. 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9078. 2025-07-20 11:18:07,234 - sglang - INFO - return super().send(request, *args, **kwargs)
  9079. 2025-07-20 11:18:07,234 - __main__ - INFO - return super().send(request, *args, **kwargs)
  9080. 2025-07-20 11:18:07,234 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9081. 2025-07-20 11:18:07,234 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9082. 2025-07-20 11:18:07,234 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9083. 2025-07-20 11:18:07,234 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9084. 2025-07-20 11:18:07,234 - sglang - INFO - raise ConnectionError(e, request=request)
  9085. 2025-07-20 11:18:07,234 - __main__ - INFO - raise ConnectionError(e, request=request)
  9086. 2025-07-20 11:18:07,234 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 0e0286ef-566f-4e0f-8c78-0db3717091a5)')
  9087. 2025-07-20 11:18:07,234 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fdad84ec210>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 0e0286ef-566f-4e0f-8c78-0db3717091a5)')
  9088. 2025-07-20 11:18:07,234 - sglang - INFO -
  9089. 2025-07-20 11:18:07,234 - __main__ - INFO -
  9090. 2025-07-20 11:18:07,235 - sglang - INFO - [2025-07-20 11:18:07] Received sigquit from a child proces. It usually means the child failed.
  9091. 2025-07-20 11:18:07,235 - __main__ - INFO - [2025-07-20 11:18:07] Received sigquit from a child proces. It usually means the child failed.
  9092. 2025-07-20 11:18:07,541 - __main__ - WARNING - SGLang server task ended
  9093. 2025-07-20 11:18:08,506 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  9094. 2025-07-20 11:18:14,589 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  9095. 2025-07-20 11:18:14,891 - sglang - INFO - [2025-07-20 11:18:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=543652995, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9096. 2025-07-20 11:18:14,891 - __main__ - INFO - [2025-07-20 11:18:14] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=543652995, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9097. 2025-07-20 11:18:20,733 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  9098. 2025-07-20 11:18:26,815 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  9099. 2025-07-20 11:18:32,896 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  9100. 2025-07-20 11:18:38,976 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  9101. 2025-07-20 11:18:45,057 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  9102. 2025-07-20 11:18:51,137 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  9103. 2025-07-20 11:18:57,220 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  9104. 2025-07-20 11:19:03,309 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  9105. 2025-07-20 11:19:09,392 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  9106. 2025-07-20 11:19:15,472 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  9107. 2025-07-20 11:19:21,553 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  9108. 2025-07-20 11:19:27,636 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  9109. 2025-07-20 11:19:33,716 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  9110. 2025-07-20 11:19:39,797 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  9111. 2025-07-20 11:19:45,873 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  9112. 2025-07-20 11:19:51,665 - sglang - INFO - [2025-07-20 11:19:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9113. 2025-07-20 11:19:51,665 - __main__ - INFO - [2025-07-20 11:19:51] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9114. 2025-07-20 11:19:51,960 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  9115. 2025-07-20 11:19:58,041 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  9116. 2025-07-20 11:19:58,086 - sglang - INFO - [2025-07-20 11:19:58 TP0] Overlap scheduler is disabled for multimodal models.
  9117. 2025-07-20 11:19:58,087 - __main__ - INFO - [2025-07-20 11:19:58 TP0] Overlap scheduler is disabled for multimodal models.
  9118. 2025-07-20 11:20:04,124 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  9119. 2025-07-20 11:20:10,205 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  9120. 2025-07-20 11:20:16,285 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  9121. 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9122. 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9123. 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9124. 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9125. 2025-07-20 11:20:18,243 - sglang - INFO - [2025-07-20 11:20:18 TP0] Init torch distributed begin.
  9126. 2025-07-20 11:20:18,243 - __main__ - INFO - [2025-07-20 11:20:18 TP0] Init torch distributed begin.
  9127. 2025-07-20 11:20:22,368 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  9128. 2025-07-20 11:20:23,643 - sglang - INFO - [2025-07-20 11:20:23 TP0] Load weight begin. avail mem=23.33 GB
  9129. 2025-07-20 11:20:23,643 - __main__ - INFO - [2025-07-20 11:20:23 TP0] Load weight begin. avail mem=23.33 GB
  9130. 2025-07-20 11:20:28,449 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  9131. 2025-07-20 11:20:34,348 - sglang - INFO - [2025-07-20 11:20:34 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9132. 2025-07-20 11:20:34,349 - __main__ - INFO - [2025-07-20 11:20:34 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9133. 2025-07-20 11:20:34,349 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9134. 2025-07-20 11:20:34,349 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9135. 2025-07-20 11:20:34,349 - sglang - INFO - sock = connection.create_connection(
  9136. 2025-07-20 11:20:34,349 - __main__ - INFO - sock = connection.create_connection(
  9137. 2025-07-20 11:20:34,349 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9138. 2025-07-20 11:20:34,349 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9139. 2025-07-20 11:20:34,349 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9140. 2025-07-20 11:20:34,349 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9141. 2025-07-20 11:20:34,349 - sglang - INFO - raise err
  9142. 2025-07-20 11:20:34,350 - __main__ - INFO - raise err
  9143. 2025-07-20 11:20:34,350 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9144. 2025-07-20 11:20:34,350 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9145. 2025-07-20 11:20:34,350 - sglang - INFO - sock.connect(sa)
  9146. 2025-07-20 11:20:34,350 - __main__ - INFO - sock.connect(sa)
  9147. 2025-07-20 11:20:34,350 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  9148. 2025-07-20 11:20:34,350 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  9149. 2025-07-20 11:20:34,350 - sglang - INFO -
  9150. 2025-07-20 11:20:34,350 - __main__ - INFO -
  9151. 2025-07-20 11:20:34,350 - sglang - INFO - The above exception was the direct cause of the following exception:
  9152. 2025-07-20 11:20:34,350 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9153. 2025-07-20 11:20:34,350 - sglang - INFO -
  9154. 2025-07-20 11:20:34,350 - __main__ - INFO -
  9155. 2025-07-20 11:20:34,351 - sglang - INFO - Traceback (most recent call last):
  9156. 2025-07-20 11:20:34,351 - __main__ - INFO - Traceback (most recent call last):
  9157. 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9158. 2025-07-20 11:20:34,351 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9159. 2025-07-20 11:20:34,351 - sglang - INFO - response = self._make_request(
  9160. 2025-07-20 11:20:34,351 - __main__ - INFO - response = self._make_request(
  9161. 2025-07-20 11:20:34,351 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9162. 2025-07-20 11:20:34,351 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9163. 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9164. 2025-07-20 11:20:34,351 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9165. 2025-07-20 11:20:34,351 - sglang - INFO - raise new_e
  9166. 2025-07-20 11:20:34,351 - __main__ - INFO - raise new_e
  9167. 2025-07-20 11:20:34,351 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9168. 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9169. 2025-07-20 11:20:34,352 - sglang - INFO - self._validate_conn(conn)
  9170. 2025-07-20 11:20:34,352 - __main__ - INFO - self._validate_conn(conn)
  9171. 2025-07-20 11:20:34,352 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9172. 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9173. 2025-07-20 11:20:34,352 - sglang - INFO - conn.connect()
  9174. 2025-07-20 11:20:34,352 - __main__ - INFO - conn.connect()
  9175. 2025-07-20 11:20:34,352 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9176. 2025-07-20 11:20:34,352 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9177. 2025-07-20 11:20:34,352 - sglang - INFO - self.sock = sock = self._new_conn()
  9178. 2025-07-20 11:20:34,352 - __main__ - INFO - self.sock = sock = self._new_conn()
  9179. 2025-07-20 11:20:34,352 - sglang - INFO - ^^^^^^^^^^^^^^^^
  9180. 2025-07-20 11:20:34,352 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  9181. 2025-07-20 11:20:34,353 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9182. 2025-07-20 11:20:34,353 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9183. 2025-07-20 11:20:34,353 - sglang - INFO - raise NewConnectionError(
  9184. 2025-07-20 11:20:34,353 - __main__ - INFO - raise NewConnectionError(
  9185. 2025-07-20 11:20:34,353 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9186. 2025-07-20 11:20:34,353 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9187. 2025-07-20 11:20:34,353 - sglang - INFO -
  9188. 2025-07-20 11:20:34,353 - __main__ - INFO -
  9189. 2025-07-20 11:20:34,353 - sglang - INFO - The above exception was the direct cause of the following exception:
  9190. 2025-07-20 11:20:34,353 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9191. 2025-07-20 11:20:34,353 - sglang - INFO -
  9192. 2025-07-20 11:20:34,353 - __main__ - INFO -
  9193. 2025-07-20 11:20:34,353 - sglang - INFO - Traceback (most recent call last):
  9194. 2025-07-20 11:20:34,354 - __main__ - INFO - Traceback (most recent call last):
  9195. 2025-07-20 11:20:34,354 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9196. 2025-07-20 11:20:34,354 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9197. 2025-07-20 11:20:34,354 - sglang - INFO - resp = conn.urlopen(
  9198. 2025-07-20 11:20:34,354 - __main__ - INFO - resp = conn.urlopen(
  9199. 2025-07-20 11:20:34,354 - sglang - INFO - ^^^^^^^^^^^^^
  9200. 2025-07-20 11:20:34,354 - __main__ - INFO - ^^^^^^^^^^^^^
  9201. 2025-07-20 11:20:34,354 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9202. 2025-07-20 11:20:34,354 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9203. 2025-07-20 11:20:34,354 - sglang - INFO - retries = retries.increment(
  9204. 2025-07-20 11:20:34,354 - __main__ - INFO - retries = retries.increment(
  9205. 2025-07-20 11:20:34,354 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9206. 2025-07-20 11:20:34,354 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9207. 2025-07-20 11:20:34,355 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9208. 2025-07-20 11:20:34,355 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9209. 2025-07-20 11:20:34,355 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9210. 2025-07-20 11:20:34,355 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9211. 2025-07-20 11:20:34,355 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9212. 2025-07-20 11:20:34,355 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9213. 2025-07-20 11:20:34,355 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9214. 2025-07-20 11:20:34,355 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9215. 2025-07-20 11:20:34,355 - sglang - INFO -
  9216. 2025-07-20 11:20:34,355 - __main__ - INFO -
  9217. 2025-07-20 11:20:34,355 - sglang - INFO - During handling of the above exception, another exception occurred:
  9218. 2025-07-20 11:20:34,355 - __main__ - INFO - During handling of the above exception, another exception occurred:
  9219. 2025-07-20 11:20:34,356 - sglang - INFO -
  9220. 2025-07-20 11:20:34,356 - __main__ - INFO -
  9221. 2025-07-20 11:20:34,356 - sglang - INFO - Traceback (most recent call last):
  9222. 2025-07-20 11:20:34,356 - __main__ - INFO - Traceback (most recent call last):
  9223. 2025-07-20 11:20:34,356 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9224. 2025-07-20 11:20:34,356 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9225. 2025-07-20 11:20:34,356 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9226. 2025-07-20 11:20:34,356 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9227. 2025-07-20 11:20:34,356 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9228. 2025-07-20 11:20:34,356 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9229. 2025-07-20 11:20:34,356 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9230. 2025-07-20 11:20:34,356 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9231. 2025-07-20 11:20:34,356 - sglang - INFO - self.tp_worker = TpWorkerClass(
  9232. 2025-07-20 11:20:34,357 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  9233. 2025-07-20 11:20:34,357 - sglang - INFO - ^^^^^^^^^^^^^^
  9234. 2025-07-20 11:20:34,357 - __main__ - INFO - ^^^^^^^^^^^^^^
  9235. 2025-07-20 11:20:34,357 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9236. 2025-07-20 11:20:34,357 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9237. 2025-07-20 11:20:34,357 - sglang - INFO - self.model_runner = ModelRunner(
  9238. 2025-07-20 11:20:34,357 - __main__ - INFO - self.model_runner = ModelRunner(
  9239. 2025-07-20 11:20:34,357 - sglang - INFO - ^^^^^^^^^^^^
  9240. 2025-07-20 11:20:34,357 - __main__ - INFO - ^^^^^^^^^^^^
  9241. 2025-07-20 11:20:34,357 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9242. 2025-07-20 11:20:34,357 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9243. 2025-07-20 11:20:34,357 - sglang - INFO - self.load_model()
  9244. 2025-07-20 11:20:34,357 - __main__ - INFO - self.load_model()
  9245. 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9246. 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9247. 2025-07-20 11:20:34,358 - sglang - INFO - self.model = get_model(
  9248. 2025-07-20 11:20:34,358 - __main__ - INFO - self.model = get_model(
  9249. 2025-07-20 11:20:34,358 - sglang - INFO - ^^^^^^^^^^
  9250. 2025-07-20 11:20:34,358 - __main__ - INFO - ^^^^^^^^^^
  9251. 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9252. 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9253. 2025-07-20 11:20:34,358 - sglang - INFO - return loader.load_model(
  9254. 2025-07-20 11:20:34,358 - __main__ - INFO - return loader.load_model(
  9255. 2025-07-20 11:20:34,358 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9256. 2025-07-20 11:20:34,358 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9257. 2025-07-20 11:20:34,358 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9258. 2025-07-20 11:20:34,358 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9259. 2025-07-20 11:20:34,359 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9260. 2025-07-20 11:20:34,359 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9261. 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9262. 2025-07-20 11:20:34,359 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9263. 2025-07-20 11:20:34,359 - sglang - INFO - for name, loaded_weight in weights:
  9264. 2025-07-20 11:20:34,359 - __main__ - INFO - for name, loaded_weight in weights:
  9265. 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9266. 2025-07-20 11:20:34,359 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9267. 2025-07-20 11:20:34,359 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  9268. 2025-07-20 11:20:34,359 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  9269. 2025-07-20 11:20:34,359 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9270. 2025-07-20 11:20:34,359 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9271. 2025-07-20 11:20:34,359 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9272. 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9273. 2025-07-20 11:20:34,360 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9274. 2025-07-20 11:20:34,360 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9275. 2025-07-20 11:20:34,360 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9276. 2025-07-20 11:20:34,360 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9277. 2025-07-20 11:20:34,360 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9278. 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9279. 2025-07-20 11:20:34,360 - sglang - INFO - hf_folder = download_weights_from_hf(
  9280. 2025-07-20 11:20:34,360 - __main__ - INFO - hf_folder = download_weights_from_hf(
  9281. 2025-07-20 11:20:34,360 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9282. 2025-07-20 11:20:34,360 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9283. 2025-07-20 11:20:34,360 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9284. 2025-07-20 11:20:34,360 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9285. 2025-07-20 11:20:34,361 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9286. 2025-07-20 11:20:34,361 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9287. 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9288. 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9289. 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9290. 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9291. 2025-07-20 11:20:34,361 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9292. 2025-07-20 11:20:34,361 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9293. 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9294. 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9295. 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9296. 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9297. 2025-07-20 11:20:34,361 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9298. 2025-07-20 11:20:34,361 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9299. 2025-07-20 11:20:34,361 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9300. 2025-07-20 11:20:34,361 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9301. 2025-07-20 11:20:34,361 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9302. 2025-07-20 11:20:34,361 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9303. 2025-07-20 11:20:34,361 - sglang - INFO - self._api.repo_info(
  9304. 2025-07-20 11:20:34,362 - __main__ - INFO - self._api.repo_info(
  9305. 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9306. 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9307. 2025-07-20 11:20:34,363 - sglang - INFO - return fn(*args, **kwargs)
  9308. 2025-07-20 11:20:34,363 - __main__ - INFO - return fn(*args, **kwargs)
  9309. 2025-07-20 11:20:34,363 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9310. 2025-07-20 11:20:34,363 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9311. 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9312. 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9313. 2025-07-20 11:20:34,363 - sglang - INFO - return method(
  9314. 2025-07-20 11:20:34,363 - __main__ - INFO - return method(
  9315. 2025-07-20 11:20:34,363 - sglang - INFO - ^^^^^^^
  9316. 2025-07-20 11:20:34,363 - __main__ - INFO - ^^^^^^^
  9317. 2025-07-20 11:20:34,363 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9318. 2025-07-20 11:20:34,363 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9319. 2025-07-20 11:20:34,363 - sglang - INFO - return fn(*args, **kwargs)
  9320. 2025-07-20 11:20:34,363 - __main__ - INFO - return fn(*args, **kwargs)
  9321. 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9322. 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9323. 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9324. 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9325. 2025-07-20 11:20:34,364 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9326. 2025-07-20 11:20:34,364 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9327. 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9328. 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9329. 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9330. 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9331. 2025-07-20 11:20:34,364 - sglang - INFO - return self.request("GET", url, **kwargs)
  9332. 2025-07-20 11:20:34,364 - __main__ - INFO - return self.request("GET", url, **kwargs)
  9333. 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9334. 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9335. 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9336. 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9337. 2025-07-20 11:20:34,364 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  9338. 2025-07-20 11:20:34,364 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  9339. 2025-07-20 11:20:34,364 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9340. 2025-07-20 11:20:34,364 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9341. 2025-07-20 11:20:34,364 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9342. 2025-07-20 11:20:34,364 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9343. 2025-07-20 11:20:34,364 - sglang - INFO - r = adapter.send(request, **kwargs)
  9344. 2025-07-20 11:20:34,364 - __main__ - INFO - r = adapter.send(request, **kwargs)
  9345. 2025-07-20 11:20:34,365 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9346. 2025-07-20 11:20:34,365 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9347. 2025-07-20 11:20:34,365 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9348. 2025-07-20 11:20:34,365 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9349. 2025-07-20 11:20:34,365 - sglang - INFO - return super().send(request, *args, **kwargs)
  9350. 2025-07-20 11:20:34,365 - __main__ - INFO - return super().send(request, *args, **kwargs)
  9351. 2025-07-20 11:20:34,365 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9352. 2025-07-20 11:20:34,365 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9353. 2025-07-20 11:20:34,365 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9354. 2025-07-20 11:20:34,365 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9355. 2025-07-20 11:20:34,365 - sglang - INFO - raise ConnectionError(e, request=request)
  9356. 2025-07-20 11:20:34,365 - __main__ - INFO - raise ConnectionError(e, request=request)
  9357. 2025-07-20 11:20:34,365 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 6a081490-df5f-4ba3-bb52-3fc81355011d)')
  9358. 2025-07-20 11:20:34,365 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd680414490>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 6a081490-df5f-4ba3-bb52-3fc81355011d)')
  9359. 2025-07-20 11:20:34,365 - sglang - INFO -
  9360. 2025-07-20 11:20:34,365 - __main__ - INFO -
  9361. 2025-07-20 11:20:34,365 - sglang - INFO - [2025-07-20 11:20:34] Received sigquit from a child proces. It usually means the child failed.
  9362. 2025-07-20 11:20:34,365 - __main__ - INFO - [2025-07-20 11:20:34] Received sigquit from a child proces. It usually means the child failed.
  9363. 2025-07-20 11:20:34,533 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  9364. 2025-07-20 11:20:34,619 - __main__ - WARNING - SGLang server task ended
  9365. 2025-07-20 11:20:40,624 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  9366. 2025-07-20 11:20:42,094 - sglang - INFO - [2025-07-20 11:20:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=741775413, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9367. 2025-07-20 11:20:42,094 - __main__ - INFO - [2025-07-20 11:20:42] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=741775413, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9368. 2025-07-20 11:20:46,686 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  9369. 2025-07-20 11:20:52,765 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  9370. 2025-07-20 11:20:58,847 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
  9371. 2025-07-20 11:21:04,928 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
  9372. 2025-07-20 11:21:11,009 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
  9373. 2025-07-20 11:21:17,099 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
  9374. 2025-07-20 11:21:23,177 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
  9375. 2025-07-20 11:21:29,255 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
  9376. 2025-07-20 11:21:35,334 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
  9377. 2025-07-20 11:21:41,415 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
  9378. 2025-07-20 11:21:47,493 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
  9379. 2025-07-20 11:21:53,583 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
  9380. 2025-07-20 11:21:59,665 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
  9381. 2025-07-20 11:22:05,746 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
  9382. 2025-07-20 11:22:11,830 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
  9383. 2025-07-20 11:22:17,911 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
  9384. 2025-07-20 11:22:18,669 - sglang - INFO - [2025-07-20 11:22:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9385. 2025-07-20 11:22:18,670 - __main__ - INFO - [2025-07-20 11:22:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9386. 2025-07-20 11:22:23,992 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
  9387. 2025-07-20 11:22:24,827 - sglang - INFO - [2025-07-20 11:22:24 TP0] Overlap scheduler is disabled for multimodal models.
  9388. 2025-07-20 11:22:24,827 - __main__ - INFO - [2025-07-20 11:22:24 TP0] Overlap scheduler is disabled for multimodal models.
  9389. 2025-07-20 11:22:30,073 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
  9390. 2025-07-20 11:22:36,155 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
  9391. 2025-07-20 11:22:42,236 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
  9392. 2025-07-20 11:22:45,011 - sglang - INFO - [2025-07-20 11:22:45 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9393. 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9394. 2025-07-20 11:22:45,012 - sglang - INFO - [2025-07-20 11:22:45 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9395. 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9396. 2025-07-20 11:22:45,012 - sglang - INFO - [2025-07-20 11:22:45 TP0] Init torch distributed begin.
  9397. 2025-07-20 11:22:45,012 - __main__ - INFO - [2025-07-20 11:22:45 TP0] Init torch distributed begin.
  9398. 2025-07-20 11:22:48,322 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
  9399. 2025-07-20 11:22:50,393 - sglang - INFO - [2025-07-20 11:22:50 TP0] Load weight begin. avail mem=23.33 GB
  9400. 2025-07-20 11:22:50,393 - __main__ - INFO - [2025-07-20 11:22:50 TP0] Load weight begin. avail mem=23.33 GB
  9401. 2025-07-20 11:22:54,404 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
  9402. 2025-07-20 11:23:00,485 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
  9403. 2025-07-20 11:23:01,080 - sglang - INFO - [2025-07-20 11:23:01 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9404. 2025-07-20 11:23:01,080 - __main__ - INFO - [2025-07-20 11:23:01 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9405. 2025-07-20 11:23:01,080 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9406. 2025-07-20 11:23:01,080 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9407. 2025-07-20 11:23:01,080 - sglang - INFO - sock = connection.create_connection(
  9408. 2025-07-20 11:23:01,080 - __main__ - INFO - sock = connection.create_connection(
  9409. 2025-07-20 11:23:01,081 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9410. 2025-07-20 11:23:01,081 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9411. 2025-07-20 11:23:01,081 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9412. 2025-07-20 11:23:01,081 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9413. 2025-07-20 11:23:01,081 - sglang - INFO - raise err
  9414. 2025-07-20 11:23:01,081 - __main__ - INFO - raise err
  9415. 2025-07-20 11:23:01,081 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9416. 2025-07-20 11:23:01,081 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9417. 2025-07-20 11:23:01,081 - sglang - INFO - sock.connect(sa)
  9418. 2025-07-20 11:23:01,081 - __main__ - INFO - sock.connect(sa)
  9419. 2025-07-20 11:23:01,081 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  9420. 2025-07-20 11:23:01,081 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  9421. 2025-07-20 11:23:01,081 - sglang - INFO -
  9422. 2025-07-20 11:23:01,081 - __main__ - INFO -
  9423. 2025-07-20 11:23:01,081 - sglang - INFO - The above exception was the direct cause of the following exception:
  9424. 2025-07-20 11:23:01,081 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9425. 2025-07-20 11:23:01,081 - sglang - INFO -
  9426. 2025-07-20 11:23:01,081 - __main__ - INFO -
  9427. 2025-07-20 11:23:01,081 - sglang - INFO - Traceback (most recent call last):
  9428. 2025-07-20 11:23:01,082 - __main__ - INFO - Traceback (most recent call last):
  9429. 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9430. 2025-07-20 11:23:01,082 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9431. 2025-07-20 11:23:01,082 - sglang - INFO - response = self._make_request(
  9432. 2025-07-20 11:23:01,082 - __main__ - INFO - response = self._make_request(
  9433. 2025-07-20 11:23:01,082 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9434. 2025-07-20 11:23:01,082 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9435. 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9436. 2025-07-20 11:23:01,082 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9437. 2025-07-20 11:23:01,082 - sglang - INFO - raise new_e
  9438. 2025-07-20 11:23:01,082 - __main__ - INFO - raise new_e
  9439. 2025-07-20 11:23:01,082 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9440. 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9441. 2025-07-20 11:23:01,083 - sglang - INFO - self._validate_conn(conn)
  9442. 2025-07-20 11:23:01,083 - __main__ - INFO - self._validate_conn(conn)
  9443. 2025-07-20 11:23:01,083 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9444. 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9445. 2025-07-20 11:23:01,083 - sglang - INFO - conn.connect()
  9446. 2025-07-20 11:23:01,083 - __main__ - INFO - conn.connect()
  9447. 2025-07-20 11:23:01,083 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9448. 2025-07-20 11:23:01,083 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9449. 2025-07-20 11:23:01,083 - sglang - INFO - self.sock = sock = self._new_conn()
  9450. 2025-07-20 11:23:01,083 - __main__ - INFO - self.sock = sock = self._new_conn()
  9451. 2025-07-20 11:23:01,083 - sglang - INFO - ^^^^^^^^^^^^^^^^
  9452. 2025-07-20 11:23:01,083 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  9453. 2025-07-20 11:23:01,084 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9454. 2025-07-20 11:23:01,084 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9455. 2025-07-20 11:23:01,084 - sglang - INFO - raise NewConnectionError(
  9456. 2025-07-20 11:23:01,084 - __main__ - INFO - raise NewConnectionError(
  9457. 2025-07-20 11:23:01,084 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9458. 2025-07-20 11:23:01,084 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9459. 2025-07-20 11:23:01,084 - sglang - INFO -
  9460. 2025-07-20 11:23:01,084 - __main__ - INFO -
  9461. 2025-07-20 11:23:01,084 - sglang - INFO - The above exception was the direct cause of the following exception:
  9462. 2025-07-20 11:23:01,084 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9463. 2025-07-20 11:23:01,084 - sglang - INFO -
  9464. 2025-07-20 11:23:01,084 - __main__ - INFO -
  9465. 2025-07-20 11:23:01,084 - sglang - INFO - Traceback (most recent call last):
  9466. 2025-07-20 11:23:01,084 - __main__ - INFO - Traceback (most recent call last):
  9467. 2025-07-20 11:23:01,084 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9468. 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9469. 2025-07-20 11:23:01,085 - sglang - INFO - resp = conn.urlopen(
  9470. 2025-07-20 11:23:01,085 - __main__ - INFO - resp = conn.urlopen(
  9471. 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^
  9472. 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^
  9473. 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9474. 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9475. 2025-07-20 11:23:01,085 - sglang - INFO - retries = retries.increment(
  9476. 2025-07-20 11:23:01,085 - __main__ - INFO - retries = retries.increment(
  9477. 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9478. 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9479. 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9480. 2025-07-20 11:23:01,085 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9481. 2025-07-20 11:23:01,085 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9482. 2025-07-20 11:23:01,085 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9483. 2025-07-20 11:23:01,085 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9484. 2025-07-20 11:23:01,085 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9485. 2025-07-20 11:23:01,085 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9486. 2025-07-20 11:23:01,085 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9487. 2025-07-20 11:23:01,085 - sglang - INFO -
  9488. 2025-07-20 11:23:01,085 - __main__ - INFO -
  9489. 2025-07-20 11:23:01,085 - sglang - INFO - During handling of the above exception, another exception occurred:
  9490. 2025-07-20 11:23:01,085 - __main__ - INFO - During handling of the above exception, another exception occurred:
  9491. 2025-07-20 11:23:01,085 - sglang - INFO -
  9492. 2025-07-20 11:23:01,085 - __main__ - INFO -
  9493. 2025-07-20 11:23:01,085 - sglang - INFO - Traceback (most recent call last):
  9494. 2025-07-20 11:23:01,085 - __main__ - INFO - Traceback (most recent call last):
  9495. 2025-07-20 11:23:01,085 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9496. 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9497. 2025-07-20 11:23:01,086 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9498. 2025-07-20 11:23:01,086 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9499. 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9500. 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9501. 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9502. 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9503. 2025-07-20 11:23:01,086 - sglang - INFO - self.tp_worker = TpWorkerClass(
  9504. 2025-07-20 11:23:01,086 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  9505. 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^^^
  9506. 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^^^
  9507. 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9508. 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9509. 2025-07-20 11:23:01,086 - sglang - INFO - self.model_runner = ModelRunner(
  9510. 2025-07-20 11:23:01,086 - __main__ - INFO - self.model_runner = ModelRunner(
  9511. 2025-07-20 11:23:01,086 - sglang - INFO - ^^^^^^^^^^^^
  9512. 2025-07-20 11:23:01,086 - __main__ - INFO - ^^^^^^^^^^^^
  9513. 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9514. 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9515. 2025-07-20 11:23:01,086 - sglang - INFO - self.load_model()
  9516. 2025-07-20 11:23:01,086 - __main__ - INFO - self.load_model()
  9517. 2025-07-20 11:23:01,086 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9518. 2025-07-20 11:23:01,086 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9519. 2025-07-20 11:23:01,086 - sglang - INFO - self.model = get_model(
  9520. 2025-07-20 11:23:01,087 - __main__ - INFO - self.model = get_model(
  9521. 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^
  9522. 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^
  9523. 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9524. 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9525. 2025-07-20 11:23:01,087 - sglang - INFO - return loader.load_model(
  9526. 2025-07-20 11:23:01,087 - __main__ - INFO - return loader.load_model(
  9527. 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9528. 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9529. 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9530. 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9531. 2025-07-20 11:23:01,087 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9532. 2025-07-20 11:23:01,087 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9533. 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9534. 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9535. 2025-07-20 11:23:01,087 - sglang - INFO - for name, loaded_weight in weights:
  9536. 2025-07-20 11:23:01,087 - __main__ - INFO - for name, loaded_weight in weights:
  9537. 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9538. 2025-07-20 11:23:01,087 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9539. 2025-07-20 11:23:01,087 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  9540. 2025-07-20 11:23:01,087 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  9541. 2025-07-20 11:23:01,087 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9542. 2025-07-20 11:23:01,087 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9543. 2025-07-20 11:23:01,087 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9544. 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9545. 2025-07-20 11:23:01,088 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9546. 2025-07-20 11:23:01,088 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9547. 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9548. 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9549. 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9550. 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9551. 2025-07-20 11:23:01,088 - sglang - INFO - hf_folder = download_weights_from_hf(
  9552. 2025-07-20 11:23:01,088 - __main__ - INFO - hf_folder = download_weights_from_hf(
  9553. 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9554. 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9555. 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9556. 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9557. 2025-07-20 11:23:01,088 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9558. 2025-07-20 11:23:01,088 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9559. 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9560. 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9561. 2025-07-20 11:23:01,088 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9562. 2025-07-20 11:23:01,088 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9563. 2025-07-20 11:23:01,088 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9564. 2025-07-20 11:23:01,088 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9565. 2025-07-20 11:23:01,088 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9566. 2025-07-20 11:23:01,088 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9567. 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9568. 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9569. 2025-07-20 11:23:01,089 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9570. 2025-07-20 11:23:01,089 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9571. 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9572. 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9573. 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9574. 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9575. 2025-07-20 11:23:01,089 - sglang - INFO - self._api.repo_info(
  9576. 2025-07-20 11:23:01,089 - __main__ - INFO - self._api.repo_info(
  9577. 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9578. 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9579. 2025-07-20 11:23:01,089 - sglang - INFO - return fn(*args, **kwargs)
  9580. 2025-07-20 11:23:01,089 - __main__ - INFO - return fn(*args, **kwargs)
  9581. 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9582. 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9583. 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9584. 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9585. 2025-07-20 11:23:01,089 - sglang - INFO - return method(
  9586. 2025-07-20 11:23:01,089 - __main__ - INFO - return method(
  9587. 2025-07-20 11:23:01,089 - sglang - INFO - ^^^^^^^
  9588. 2025-07-20 11:23:01,089 - __main__ - INFO - ^^^^^^^
  9589. 2025-07-20 11:23:01,089 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9590. 2025-07-20 11:23:01,089 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9591. 2025-07-20 11:23:01,090 - sglang - INFO - return fn(*args, **kwargs)
  9592. 2025-07-20 11:23:01,090 - __main__ - INFO - return fn(*args, **kwargs)
  9593. 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9594. 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9595. 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9596. 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9597. 2025-07-20 11:23:01,090 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9598. 2025-07-20 11:23:01,090 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9599. 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9600. 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9601. 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9602. 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9603. 2025-07-20 11:23:01,090 - sglang - INFO - return self.request("GET", url, **kwargs)
  9604. 2025-07-20 11:23:01,090 - __main__ - INFO - return self.request("GET", url, **kwargs)
  9605. 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9606. 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9607. 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9608. 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9609. 2025-07-20 11:23:01,090 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  9610. 2025-07-20 11:23:01,090 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  9611. 2025-07-20 11:23:01,090 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9612. 2025-07-20 11:23:01,090 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9613. 2025-07-20 11:23:01,090 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9614. 2025-07-20 11:23:01,090 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9615. 2025-07-20 11:23:01,091 - sglang - INFO - r = adapter.send(request, **kwargs)
  9616. 2025-07-20 11:23:01,091 - __main__ - INFO - r = adapter.send(request, **kwargs)
  9617. 2025-07-20 11:23:01,091 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9618. 2025-07-20 11:23:01,091 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9619. 2025-07-20 11:23:01,091 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9620. 2025-07-20 11:23:01,091 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9621. 2025-07-20 11:23:01,091 - sglang - INFO - return super().send(request, *args, **kwargs)
  9622. 2025-07-20 11:23:01,091 - __main__ - INFO - return super().send(request, *args, **kwargs)
  9623. 2025-07-20 11:23:01,091 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9624. 2025-07-20 11:23:01,091 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9625. 2025-07-20 11:23:01,091 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9626. 2025-07-20 11:23:01,091 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9627. 2025-07-20 11:23:01,091 - sglang - INFO - raise ConnectionError(e, request=request)
  9628. 2025-07-20 11:23:01,091 - __main__ - INFO - raise ConnectionError(e, request=request)
  9629. 2025-07-20 11:23:01,091 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 3a71c81f-a953-43be-9246-0327729c923d)')
  9630. 2025-07-20 11:23:01,091 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f48841cf7d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 3a71c81f-a953-43be-9246-0327729c923d)')
  9631. 2025-07-20 11:23:01,091 - sglang - INFO -
  9632. 2025-07-20 11:23:01,091 - __main__ - INFO -
  9633. 2025-07-20 11:23:01,091 - sglang - INFO - [2025-07-20 11:23:01] Received sigquit from a child proces. It usually means the child failed.
  9634. 2025-07-20 11:23:01,091 - __main__ - INFO - [2025-07-20 11:23:01] Received sigquit from a child proces. It usually means the child failed.
  9635. 2025-07-20 11:23:01,432 - __main__ - WARNING - SGLang server task ended
  9636. 2025-07-20 11:23:06,644 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
  9637. 2025-07-20 11:23:08,746 - sglang - INFO - [2025-07-20 11:23:08] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=935034446, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9638. 2025-07-20 11:23:08,746 - __main__ - INFO - [2025-07-20 11:23:08] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=935034446, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9639. 2025-07-20 11:23:12,705 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
  9640. 2025-07-20 11:23:18,766 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
  9641. 2025-07-20 11:23:24,846 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
  9642. 2025-07-20 11:23:30,927 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
  9643. 2025-07-20 11:23:37,008 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
  9644. 2025-07-20 11:23:43,087 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
  9645. 2025-07-20 11:23:49,168 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
  9646. 2025-07-20 11:23:55,248 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
  9647. 2025-07-20 11:24:01,341 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
  9648. 2025-07-20 11:24:07,423 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
  9649. 2025-07-20 11:24:13,505 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
  9650. 2025-07-20 11:24:19,585 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
  9651. 2025-07-20 11:24:25,666 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
  9652. 2025-07-20 11:24:31,748 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
  9653. 2025-07-20 11:24:37,829 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
  9654. 2025-07-20 11:24:43,910 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
  9655. 2025-07-20 11:24:45,439 - sglang - INFO - [2025-07-20 11:24:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9656. 2025-07-20 11:24:45,439 - __main__ - INFO - [2025-07-20 11:24:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9657. 2025-07-20 11:24:49,991 - __main__ - WARNING - Attempt 114: Please wait for sglang server to become ready...
  9658. 2025-07-20 11:24:51,481 - sglang - INFO - [2025-07-20 11:24:51 TP0] Overlap scheduler is disabled for multimodal models.
  9659. 2025-07-20 11:24:51,481 - __main__ - INFO - [2025-07-20 11:24:51 TP0] Overlap scheduler is disabled for multimodal models.
  9660. 2025-07-20 11:24:56,075 - __main__ - WARNING - Attempt 115: Please wait for sglang server to become ready...
  9661. 2025-07-20 11:25:02,156 - __main__ - WARNING - Attempt 116: Please wait for sglang server to become ready...
  9662. 2025-07-20 11:25:08,237 - __main__ - WARNING - Attempt 117: Please wait for sglang server to become ready...
  9663. 2025-07-20 11:25:11,652 - sglang - INFO - [2025-07-20 11:25:11 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9664. 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9665. 2025-07-20 11:25:11,653 - sglang - INFO - [2025-07-20 11:25:11 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9666. 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9667. 2025-07-20 11:25:11,653 - sglang - INFO - [2025-07-20 11:25:11 TP0] Init torch distributed begin.
  9668. 2025-07-20 11:25:11,653 - __main__ - INFO - [2025-07-20 11:25:11 TP0] Init torch distributed begin.
  9669. 2025-07-20 11:25:14,319 - __main__ - WARNING - Attempt 118: Please wait for sglang server to become ready...
  9670. 2025-07-20 11:25:17,053 - sglang - INFO - [2025-07-20 11:25:17 TP0] Load weight begin. avail mem=23.33 GB
  9671. 2025-07-20 11:25:17,053 - __main__ - INFO - [2025-07-20 11:25:17 TP0] Load weight begin. avail mem=23.33 GB
  9672. 2025-07-20 11:25:20,401 - __main__ - WARNING - Attempt 119: Please wait for sglang server to become ready...
  9673. 2025-07-20 11:25:26,481 - __main__ - WARNING - Attempt 120: Please wait for sglang server to become ready...
  9674. 2025-07-20 11:25:27,747 - sglang - INFO - [2025-07-20 11:25:27 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9675. 2025-07-20 11:25:27,748 - __main__ - INFO - [2025-07-20 11:25:27 TP0] Scheduler hit an exception: Traceback (most recent call last):
  9676. 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9677. 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
  9678. 2025-07-20 11:25:27,748 - sglang - INFO - sock = connection.create_connection(
  9679. 2025-07-20 11:25:27,748 - __main__ - INFO - sock = connection.create_connection(
  9680. 2025-07-20 11:25:27,748 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9681. 2025-07-20 11:25:27,748 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9682. 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9683. 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
  9684. 2025-07-20 11:25:27,748 - sglang - INFO - raise err
  9685. 2025-07-20 11:25:27,748 - __main__ - INFO - raise err
  9686. 2025-07-20 11:25:27,748 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9687. 2025-07-20 11:25:27,748 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
  9688. 2025-07-20 11:25:27,748 - sglang - INFO - sock.connect(sa)
  9689. 2025-07-20 11:25:27,748 - __main__ - INFO - sock.connect(sa)
  9690. 2025-07-20 11:25:27,748 - sglang - INFO - OSError: [Errno 101] Network is unreachable
  9691. 2025-07-20 11:25:27,748 - __main__ - INFO - OSError: [Errno 101] Network is unreachable
  9692. 2025-07-20 11:25:27,749 - sglang - INFO -
  9693. 2025-07-20 11:25:27,749 - __main__ - INFO -
  9694. 2025-07-20 11:25:27,749 - sglang - INFO - The above exception was the direct cause of the following exception:
  9695. 2025-07-20 11:25:27,749 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9696. 2025-07-20 11:25:27,749 - sglang - INFO -
  9697. 2025-07-20 11:25:27,749 - __main__ - INFO -
  9698. 2025-07-20 11:25:27,749 - sglang - INFO - Traceback (most recent call last):
  9699. 2025-07-20 11:25:27,749 - __main__ - INFO - Traceback (most recent call last):
  9700. 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9701. 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
  9702. 2025-07-20 11:25:27,749 - sglang - INFO - response = self._make_request(
  9703. 2025-07-20 11:25:27,749 - __main__ - INFO - response = self._make_request(
  9704. 2025-07-20 11:25:27,749 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9705. 2025-07-20 11:25:27,749 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9706. 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9707. 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 488, in _make_request
  9708. 2025-07-20 11:25:27,749 - sglang - INFO - raise new_e
  9709. 2025-07-20 11:25:27,749 - __main__ - INFO - raise new_e
  9710. 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9711. 2025-07-20 11:25:27,749 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 464, in _make_request
  9712. 2025-07-20 11:25:27,749 - sglang - INFO - self._validate_conn(conn)
  9713. 2025-07-20 11:25:27,749 - __main__ - INFO - self._validate_conn(conn)
  9714. 2025-07-20 11:25:27,749 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9715. 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1093, in _validate_conn
  9716. 2025-07-20 11:25:27,750 - sglang - INFO - conn.connect()
  9717. 2025-07-20 11:25:27,750 - __main__ - INFO - conn.connect()
  9718. 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9719. 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 704, in connect
  9720. 2025-07-20 11:25:27,750 - sglang - INFO - self.sock = sock = self._new_conn()
  9721. 2025-07-20 11:25:27,750 - __main__ - INFO - self.sock = sock = self._new_conn()
  9722. 2025-07-20 11:25:27,750 - sglang - INFO - ^^^^^^^^^^^^^^^^
  9723. 2025-07-20 11:25:27,750 - __main__ - INFO - ^^^^^^^^^^^^^^^^
  9724. 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9725. 2025-07-20 11:25:27,750 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connection.py", line 213, in _new_conn
  9726. 2025-07-20 11:25:27,750 - sglang - INFO - raise NewConnectionError(
  9727. 2025-07-20 11:25:27,750 - __main__ - INFO - raise NewConnectionError(
  9728. 2025-07-20 11:25:27,750 - sglang - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9729. 2025-07-20 11:25:27,750 - __main__ - INFO - urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable
  9730. 2025-07-20 11:25:27,750 - sglang - INFO -
  9731. 2025-07-20 11:25:27,750 - __main__ - INFO -
  9732. 2025-07-20 11:25:27,750 - sglang - INFO - The above exception was the direct cause of the following exception:
  9733. 2025-07-20 11:25:27,750 - __main__ - INFO - The above exception was the direct cause of the following exception:
  9734. 2025-07-20 11:25:27,750 - sglang - INFO -
  9735. 2025-07-20 11:25:27,750 - __main__ - INFO -
  9736. 2025-07-20 11:25:27,750 - sglang - INFO - Traceback (most recent call last):
  9737. 2025-07-20 11:25:27,750 - __main__ - INFO - Traceback (most recent call last):
  9738. 2025-07-20 11:25:27,750 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9739. 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 667, in send
  9740. 2025-07-20 11:25:27,751 - sglang - INFO - resp = conn.urlopen(
  9741. 2025-07-20 11:25:27,751 - __main__ - INFO - resp = conn.urlopen(
  9742. 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^
  9743. 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^
  9744. 2025-07-20 11:25:27,751 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9745. 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/connectionpool.py", line 841, in urlopen
  9746. 2025-07-20 11:25:27,751 - sglang - INFO - retries = retries.increment(
  9747. 2025-07-20 11:25:27,751 - __main__ - INFO - retries = retries.increment(
  9748. 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9749. 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9750. 2025-07-20 11:25:27,751 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9751. 2025-07-20 11:25:27,751 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/urllib3/util/retry.py", line 519, in increment
  9752. 2025-07-20 11:25:27,751 - sglang - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9753. 2025-07-20 11:25:27,751 - __main__ - INFO - raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
  9754. 2025-07-20 11:25:27,751 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9755. 2025-07-20 11:25:27,751 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9756. 2025-07-20 11:25:27,751 - sglang - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9757. 2025-07-20 11:25:27,751 - __main__ - INFO - urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))
  9758. 2025-07-20 11:25:27,751 - sglang - INFO -
  9759. 2025-07-20 11:25:27,751 - __main__ - INFO -
  9760. 2025-07-20 11:25:27,751 - sglang - INFO - During handling of the above exception, another exception occurred:
  9761. 2025-07-20 11:25:27,752 - __main__ - INFO - During handling of the above exception, another exception occurred:
  9762. 2025-07-20 11:25:27,752 - sglang - INFO -
  9763. 2025-07-20 11:25:27,752 - __main__ - INFO -
  9764. 2025-07-20 11:25:27,752 - sglang - INFO - Traceback (most recent call last):
  9765. 2025-07-20 11:25:27,752 - __main__ - INFO - Traceback (most recent call last):
  9766. 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9767. 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process
  9768. 2025-07-20 11:25:27,752 - sglang - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9769. 2025-07-20 11:25:27,752 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
  9770. 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9771. 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9772. 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9773. 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 239, in __init__
  9774. 2025-07-20 11:25:27,752 - sglang - INFO - self.tp_worker = TpWorkerClass(
  9775. 2025-07-20 11:25:27,752 - __main__ - INFO - self.tp_worker = TpWorkerClass(
  9776. 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^^^
  9777. 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^^^
  9778. 2025-07-20 11:25:27,752 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9779. 2025-07-20 11:25:27,752 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__
  9780. 2025-07-20 11:25:27,752 - sglang - INFO - self.model_runner = ModelRunner(
  9781. 2025-07-20 11:25:27,752 - __main__ - INFO - self.model_runner = ModelRunner(
  9782. 2025-07-20 11:25:27,752 - sglang - INFO - ^^^^^^^^^^^^
  9783. 2025-07-20 11:25:27,752 - __main__ - INFO - ^^^^^^^^^^^^
  9784. 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9785. 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__
  9786. 2025-07-20 11:25:27,753 - sglang - INFO - self.load_model()
  9787. 2025-07-20 11:25:27,753 - __main__ - INFO - self.load_model()
  9788. 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9789. 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model
  9790. 2025-07-20 11:25:27,753 - sglang - INFO - self.model = get_model(
  9791. 2025-07-20 11:25:27,753 - __main__ - INFO - self.model = get_model(
  9792. 2025-07-20 11:25:27,753 - sglang - INFO - ^^^^^^^^^^
  9793. 2025-07-20 11:25:27,753 - __main__ - INFO - ^^^^^^^^^^
  9794. 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9795. 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model
  9796. 2025-07-20 11:25:27,753 - sglang - INFO - return loader.load_model(
  9797. 2025-07-20 11:25:27,753 - __main__ - INFO - return loader.load_model(
  9798. 2025-07-20 11:25:27,753 - sglang - INFO - ^^^^^^^^^^^^^^^^^^
  9799. 2025-07-20 11:25:27,753 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^
  9800. 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9801. 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 362, in load_model
  9802. 2025-07-20 11:25:27,753 - sglang - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9803. 2025-07-20 11:25:27,753 - __main__ - INFO - model.load_weights(self._get_all_weights(model_config, model))
  9804. 2025-07-20 11:25:27,753 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9805. 2025-07-20 11:25:27,753 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/models/qwen2_vl.py", line 576, in load_weights
  9806. 2025-07-20 11:25:27,754 - sglang - INFO - for name, loaded_weight in weights:
  9807. 2025-07-20 11:25:27,754 - __main__ - INFO - for name, loaded_weight in weights:
  9808. 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9809. 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 335, in _get_all_weights
  9810. 2025-07-20 11:25:27,754 - sglang - INFO - yield from self._get_weights_iterator(primary_weights)
  9811. 2025-07-20 11:25:27,754 - __main__ - INFO - yield from self._get_weights_iterator(primary_weights)
  9812. 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9813. 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9814. 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9815. 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 303, in _get_weights_iterator
  9816. 2025-07-20 11:25:27,754 - sglang - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9817. 2025-07-20 11:25:27,754 - __main__ - INFO - hf_folder, hf_weights_files, use_safetensors = self._prepare_weights(
  9818. 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9819. 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  9820. 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9821. 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/loader.py", line 255, in _prepare_weights
  9822. 2025-07-20 11:25:27,754 - sglang - INFO - hf_folder = download_weights_from_hf(
  9823. 2025-07-20 11:25:27,754 - __main__ - INFO - hf_folder = download_weights_from_hf(
  9824. 2025-07-20 11:25:27,754 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9825. 2025-07-20 11:25:27,754 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^
  9826. 2025-07-20 11:25:27,754 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9827. 2025-07-20 11:25:27,754 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/model_loader/weight_utils.py", line 246, in download_weights_from_hf
  9828. 2025-07-20 11:25:27,754 - sglang - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9829. 2025-07-20 11:25:27,755 - __main__ - INFO - file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
  9830. 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9831. 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9832. 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9833. 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 368, in ls
  9834. 2025-07-20 11:25:27,755 - sglang - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9835. 2025-07-20 11:25:27,755 - __main__ - INFO - resolved_path = self.resolve_path(path, revision=revision)
  9836. 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9837. 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9838. 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9839. 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 209, in resolve_path
  9840. 2025-07-20 11:25:27,755 - sglang - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9841. 2025-07-20 11:25:27,755 - __main__ - INFO - repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
  9842. 2025-07-20 11:25:27,755 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9843. 2025-07-20 11:25:27,755 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9844. 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9845. 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py", line 125, in _repo_and_revision_exist
  9846. 2025-07-20 11:25:27,755 - sglang - INFO - self._api.repo_info(
  9847. 2025-07-20 11:25:27,755 - __main__ - INFO - self._api.repo_info(
  9848. 2025-07-20 11:25:27,755 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9849. 2025-07-20 11:25:27,755 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9850. 2025-07-20 11:25:27,755 - sglang - INFO - return fn(*args, **kwargs)
  9851. 2025-07-20 11:25:27,756 - __main__ - INFO - return fn(*args, **kwargs)
  9852. 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9853. 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9854. 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9855. 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2816, in repo_info
  9856. 2025-07-20 11:25:27,756 - sglang - INFO - return method(
  9857. 2025-07-20 11:25:27,756 - __main__ - INFO - return method(
  9858. 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^
  9859. 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^
  9860. 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9861. 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  9862. 2025-07-20 11:25:27,756 - sglang - INFO - return fn(*args, **kwargs)
  9863. 2025-07-20 11:25:27,756 - __main__ - INFO - return fn(*args, **kwargs)
  9864. 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^
  9865. 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^
  9866. 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9867. 2025-07-20 11:25:27,756 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/hf_api.py", line 2600, in model_info
  9868. 2025-07-20 11:25:27,756 - sglang - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9869. 2025-07-20 11:25:27,756 - __main__ - INFO - r = get_session().get(path, headers=headers, timeout=timeout, params=params)
  9870. 2025-07-20 11:25:27,756 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9871. 2025-07-20 11:25:27,756 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9872. 2025-07-20 11:25:27,756 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9873. 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 602, in get
  9874. 2025-07-20 11:25:27,757 - sglang - INFO - return self.request("GET", url, **kwargs)
  9875. 2025-07-20 11:25:27,757 - __main__ - INFO - return self.request("GET", url, **kwargs)
  9876. 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9877. 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9878. 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9879. 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 589, in request
  9880. 2025-07-20 11:25:27,757 - sglang - INFO - resp = self.send(prep, **send_kwargs)
  9881. 2025-07-20 11:25:27,757 - __main__ - INFO - resp = self.send(prep, **send_kwargs)
  9882. 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9883. 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9884. 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9885. 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/sessions.py", line 703, in send
  9886. 2025-07-20 11:25:27,757 - sglang - INFO - r = adapter.send(request, **kwargs)
  9887. 2025-07-20 11:25:27,757 - __main__ - INFO - r = adapter.send(request, **kwargs)
  9888. 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9889. 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9890. 2025-07-20 11:25:27,757 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9891. 2025-07-20 11:25:27,757 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send
  9892. 2025-07-20 11:25:27,757 - sglang - INFO - return super().send(request, *args, **kwargs)
  9893. 2025-07-20 11:25:27,757 - __main__ - INFO - return super().send(request, *args, **kwargs)
  9894. 2025-07-20 11:25:27,757 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9895. 2025-07-20 11:25:27,757 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  9896. 2025-07-20 11:25:27,758 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9897. 2025-07-20 11:25:27,758 - __main__ - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/requests/adapters.py", line 700, in send
  9898. 2025-07-20 11:25:27,758 - sglang - INFO - raise ConnectionError(e, request=request)
  9899. 2025-07-20 11:25:27,758 - __main__ - INFO - raise ConnectionError(e, request=request)
  9900. 2025-07-20 11:25:27,758 - sglang - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 9331f4da-f005-4022-a34a-4bb0423deb4d)')
  9901. 2025-07-20 11:25:27,758 - __main__ - INFO - requests.exceptions.ConnectionError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /api/models/allenai/olmOCR-7B-0225-preview (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f28c06cc1d0>: Failed to establish a new connection: [Errno 101] Network is unreachable'))"), '(Request ID: 9331f4da-f005-4022-a34a-4bb0423deb4d)')
  9902. 2025-07-20 11:25:27,758 - sglang - INFO -
  9903. 2025-07-20 11:25:27,758 - __main__ - INFO -
  9904. 2025-07-20 11:25:27,758 - sglang - INFO - [2025-07-20 11:25:27] Received sigquit from a child proces. It usually means the child failed.
  9905. 2025-07-20 11:25:27,758 - __main__ - INFO - [2025-07-20 11:25:27] Received sigquit from a child proces. It usually means the child failed.
  9906. 2025-07-20 11:25:28,039 - __main__ - WARNING - SGLang server task ended
  9907. 2025-07-20 11:25:28,040 - __main__ - ERROR - Ended up starting the sglang server more than 5 times, cancelling pipeline
  9908. 2025-07-20 11:25:28,040 - __main__ - ERROR -
  9909. 2025-07-20 11:25:28,040 - __main__ - ERROR - Please make sure sglang is installed according to the latest instructions here: https://docs.sglang.ai/start/install.html
  9910. 2025-07-20 15:06:33,448 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  9911. 2025-07-20 15:06:33,448 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  9912. 2025-07-20 15:06:33,448 - __main__ - INFO - Found 1 total pdf paths to add
  9913. 2025-07-20 15:06:33,451 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  9914. 2025-07-20 15:06:33,678 - __main__ - INFO - Starting pipeline with PID 589922
  9915. 2025-07-20 15:06:33,679 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  9916. 2025-07-20 15:06:38,788 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  9917. 2025-07-20 15:06:40,602 - sglang - INFO - [2025-07-20 15:06:40] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=555501304, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9918. 2025-07-20 15:06:40,603 - __main__ - INFO - [2025-07-20 15:06:40] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=555501304, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  9919. 2025-07-20 15:06:41,702 - sglang - INFO - [2025-07-20 15:06:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9920. 2025-07-20 15:06:41,702 - __main__ - INFO - [2025-07-20 15:06:41] Use chat template for the OpenAI-compatible API server: qwen2-vl
  9921. 2025-07-20 15:06:44,868 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  9922. 2025-07-20 15:06:47,936 - sglang - INFO - [2025-07-20 15:06:47 TP0] Overlap scheduler is disabled for multimodal models.
  9923. 2025-07-20 15:06:47,936 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Overlap scheduler is disabled for multimodal models.
  9924. 2025-07-20 15:06:47,938 - sglang - INFO - [2025-07-20 15:06:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9925. 2025-07-20 15:06:47,938 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  9926. 2025-07-20 15:06:47,938 - sglang - INFO - [2025-07-20 15:06:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9927. 2025-07-20 15:06:47,938 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  9928. 2025-07-20 15:06:47,939 - sglang - INFO - [2025-07-20 15:06:47 TP0] Init torch distributed begin.
  9929. 2025-07-20 15:06:47,939 - __main__ - INFO - [2025-07-20 15:06:47 TP0] Init torch distributed begin.
  9930. 2025-07-20 15:06:50,947 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  9931. 2025-07-20 15:06:53,328 - sglang - INFO - [2025-07-20 15:06:53 TP0] Load weight begin. avail mem=23.33 GB
  9932. 2025-07-20 15:06:53,328 - __main__ - INFO - [2025-07-20 15:06:53 TP0] Load weight begin. avail mem=23.33 GB
  9933. 2025-07-20 15:06:53,875 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  9934. 2025-07-20 15:06:53,875 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  9935. 2025-07-20 15:06:54,879 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.00s/it]
  9936. 2025-07-20 15:06:54,879 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.00s/it]
  9937. 2025-07-20 15:06:56,046 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.10s/it]
  9938. 2025-07-20 15:06:56,046 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.10s/it]
  9939. 2025-07-20 15:06:57,027 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  9940. 2025-07-20 15:06:57,169 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
  9941. 2025-07-20 15:06:57,170 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.11s/it]
  9942. 2025-07-20 15:06:57,692 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  9943. 2025-07-20 15:06:57,693 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  9944. 2025-07-20 15:06:57,693 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
  9945. 2025-07-20 15:06:57,693 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.05it/s]
  9946. 2025-07-20 15:06:57,693 - sglang - INFO -
  9947. 2025-07-20 15:06:57,693 - __main__ - INFO -
  9948. 2025-07-20 15:06:57,772 - sglang - INFO - [2025-07-20 15:06:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  9949. 2025-07-20 15:06:57,773 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  9950. 2025-07-20 15:06:57,778 - sglang - INFO - [2025-07-20 15:06:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  9951. 2025-07-20 15:06:57,778 - __main__ - INFO - [2025-07-20 15:06:57 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  9952. 2025-07-20 15:06:57,779 - sglang - INFO - [2025-07-20 15:06:57 TP0] Memory pool end. avail mem=5.30 GB
  9953. 2025-07-20 15:06:57,779 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Memory pool end. avail mem=5.30 GB
  9954. 2025-07-20 15:06:57,924 - sglang - INFO - [2025-07-20 15:06:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  9955. 2025-07-20 15:06:57,924 - __main__ - INFO - [2025-07-20 15:06:57 TP0] Capture cuda graph begin. This can take up to several minutes.
  9956. 2025-07-20 15:06:59,713 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.79it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.24it/s]
  9957. 2025-07-20 15:06:59,714 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.79it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.34it/s] 100%|██████████| 4/4 [00:01<00:00, 2.73it/s] 100%|██████████| 4/4 [00:01<00:00, 2.24it/s]
  9958. 2025-07-20 15:06:59,714 - sglang - INFO - [2025-07-20 15:06:59 TP0] Capture cuda graph end. Time elapsed: 1.79 s
  9959. 2025-07-20 15:06:59,714 - __main__ - INFO - [2025-07-20 15:06:59 TP0] Capture cuda graph end. Time elapsed: 1.79 s
  9960. 2025-07-20 15:07:00,413 - sglang - INFO - [2025-07-20 15:07:00 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  9961. 2025-07-20 15:07:00,413 - __main__ - INFO - [2025-07-20 15:07:00 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  9962. 2025-07-20 15:07:00,503 - sglang - INFO - [2025-07-20 15:07:00] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  9963. 2025-07-20 15:07:00,503 - __main__ - INFO - [2025-07-20 15:07:00] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  9964. 2025-07-20 15:07:03,108 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  9965. 2025-07-20 15:07:09,179 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  9966. 2025-07-20 15:07:15,259 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  9967. 2025-07-20 15:07:21,339 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  9968. 2025-07-20 15:07:27,419 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  9969. 2025-07-20 15:07:33,500 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  9970. 2025-07-20 15:07:39,592 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  9971. 2025-07-20 15:07:45,671 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  9972. 2025-07-20 15:07:51,751 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  9973. 2025-07-20 15:07:57,832 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  9974. 2025-07-20 15:08:03,913 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  9975. 2025-07-20 15:08:09,987 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  9976. 2025-07-20 15:08:16,068 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  9977. 2025-07-20 15:08:22,150 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  9978. 2025-07-20 15:08:28,231 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  9979. 2025-07-20 15:08:34,312 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  9980. 2025-07-20 15:08:40,392 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  9981. 2025-07-20 15:08:46,472 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  9982. 2025-07-20 15:08:52,553 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  9983. 2025-07-20 15:08:58,634 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  9984. 2025-07-20 15:09:04,715 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  9985. 2025-07-20 15:09:10,797 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  9986. 2025-07-20 15:09:16,859 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  9987. 2025-07-20 15:09:22,940 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  9988. 2025-07-20 15:09:29,020 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  9989. 2025-07-20 15:09:35,079 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  9990. 2025-07-20 15:09:41,158 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  9991. 2025-07-20 15:09:47,206 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  9992. 2025-07-20 15:09:53,285 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  9993. 2025-07-20 15:09:59,363 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  9994. 2025-07-20 15:10:05,451 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  9995. 2025-07-20 15:10:11,532 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  9996. 2025-07-20 15:10:17,623 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  9997. 2025-07-20 15:10:23,705 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  9998. 2025-07-20 15:10:29,787 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  9999. 2025-07-20 15:10:35,867 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  10000. 2025-07-20 15:10:41,949 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  10001. 2025-07-20 15:10:48,029 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  10002. 2025-07-20 15:10:54,111 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  10003. 2025-07-20 15:11:00,192 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  10004. 2025-07-20 15:11:06,272 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  10005. 2025-07-20 15:11:12,353 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  10006. 2025-07-20 15:11:18,433 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  10007. 2025-07-20 15:11:24,513 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  10008. 2025-07-20 15:11:30,551 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  10009. 2025-07-20 15:11:36,632 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  10010. 2025-07-20 15:11:42,713 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  10011. 2025-07-20 15:11:48,795 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  10012. 2025-07-20 15:11:54,875 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  10013. 2025-07-20 15:12:00,956 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  10014. 2025-07-20 15:12:07,037 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  10015. 2025-07-20 15:12:13,120 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  10016. 2025-07-20 15:12:19,201 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  10017. 2025-07-20 15:12:25,290 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  10018. 2025-07-20 15:12:31,371 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  10019. 2025-07-20 15:12:37,454 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  10020. 2025-07-20 15:12:43,536 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  10021. 2025-07-20 15:12:49,616 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  10022. 2025-07-20 15:12:55,697 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  10023. 2025-07-20 15:13:01,777 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  10024. 2025-07-20 15:13:07,857 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  10025. 2025-07-20 15:13:13,947 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  10026. 2025-07-20 15:13:20,040 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  10027. 2025-07-20 15:13:26,127 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  10028. 2025-07-20 15:13:32,208 - __main__ - WARNING - Attempt 69: Please wait for sglang server to become ready...
  10029. 2025-07-20 15:13:38,288 - __main__ - WARNING - Attempt 70: Please wait for sglang server to become ready...
  10030. 2025-07-20 15:13:44,368 - __main__ - WARNING - Attempt 71: Please wait for sglang server to become ready...
  10031. 2025-07-20 15:13:50,448 - __main__ - WARNING - Attempt 72: Please wait for sglang server to become ready...
  10032. 2025-07-20 15:13:56,528 - __main__ - WARNING - Attempt 73: Please wait for sglang server to become ready...
  10033. 2025-07-20 15:14:02,607 - __main__ - WARNING - Attempt 74: Please wait for sglang server to become ready...
  10034. 2025-07-20 15:14:08,688 - __main__ - WARNING - Attempt 75: Please wait for sglang server to become ready...
  10035. 2025-07-20 15:14:14,769 - __main__ - WARNING - Attempt 76: Please wait for sglang server to become ready...
  10036. 2025-07-20 15:14:20,807 - __main__ - WARNING - Attempt 77: Please wait for sglang server to become ready...
  10037. 2025-07-20 15:14:26,849 - __main__ - WARNING - Attempt 78: Please wait for sglang server to become ready...
  10038. 2025-07-20 15:14:32,930 - __main__ - WARNING - Attempt 79: Please wait for sglang server to become ready...
  10039. 2025-07-20 15:14:39,011 - __main__ - WARNING - Attempt 80: Please wait for sglang server to become ready...
  10040. 2025-07-20 15:14:45,084 - __main__ - WARNING - Attempt 81: Please wait for sglang server to become ready...
  10041. 2025-07-20 15:14:51,164 - __main__ - WARNING - Attempt 82: Please wait for sglang server to become ready...
  10042. 2025-07-20 15:14:57,217 - __main__ - WARNING - Attempt 83: Please wait for sglang server to become ready...
  10043. 2025-07-20 15:15:03,297 - __main__ - WARNING - Attempt 84: Please wait for sglang server to become ready...
  10044. 2025-07-20 15:15:09,377 - __main__ - WARNING - Attempt 85: Please wait for sglang server to become ready...
  10045. 2025-07-20 15:15:15,457 - __main__ - WARNING - Attempt 86: Please wait for sglang server to become ready...
  10046. 2025-07-20 15:15:21,546 - __main__ - WARNING - Attempt 87: Please wait for sglang server to become ready...
  10047. 2025-07-20 15:15:27,625 - __main__ - WARNING - Attempt 88: Please wait for sglang server to become ready...
  10048. 2025-07-20 15:15:33,704 - __main__ - WARNING - Attempt 89: Please wait for sglang server to become ready...
  10049. 2025-07-20 15:15:39,741 - __main__ - WARNING - Attempt 90: Please wait for sglang server to become ready...
  10050. 2025-07-20 15:15:45,819 - __main__ - WARNING - Attempt 91: Please wait for sglang server to become ready...
  10051. 2025-07-20 15:15:51,900 - __main__ - WARNING - Attempt 92: Please wait for sglang server to become ready...
  10052. 2025-07-20 15:15:57,980 - __main__ - WARNING - Attempt 93: Please wait for sglang server to become ready...
  10053. 2025-07-20 15:16:04,060 - __main__ - WARNING - Attempt 94: Please wait for sglang server to become ready...
  10054. 2025-07-20 15:16:10,141 - __main__ - WARNING - Attempt 95: Please wait for sglang server to become ready...
  10055. 2025-07-20 15:16:16,222 - __main__ - WARNING - Attempt 96: Please wait for sglang server to become ready...
  10056. 2025-07-20 15:16:22,302 - __main__ - WARNING - Attempt 97: Please wait for sglang server to become ready...
  10057. 2025-07-20 15:16:28,382 - __main__ - WARNING - Attempt 98: Please wait for sglang server to become ready...
  10058. 2025-07-20 15:16:34,462 - __main__ - WARNING - Attempt 99: Please wait for sglang server to become ready...
  10059. 2025-07-20 15:16:40,544 - __main__ - WARNING - Attempt 100: Please wait for sglang server to become ready...
  10060. 2025-07-20 15:16:46,624 - __main__ - WARNING - Attempt 101: Please wait for sglang server to become ready...
  10061. 2025-07-20 15:16:52,704 - __main__ - WARNING - Attempt 102: Please wait for sglang server to become ready...
  10062. 2025-07-20 15:16:58,785 - __main__ - WARNING - Attempt 103: Please wait for sglang server to become ready...
  10063. 2025-07-20 15:17:04,865 - __main__ - WARNING - Attempt 104: Please wait for sglang server to become ready...
  10064. 2025-07-20 15:17:10,945 - __main__ - WARNING - Attempt 105: Please wait for sglang server to become ready...
  10065. 2025-07-20 15:17:17,028 - __main__ - WARNING - Attempt 106: Please wait for sglang server to become ready...
  10066. 2025-07-20 15:17:23,107 - __main__ - WARNING - Attempt 107: Please wait for sglang server to become ready...
  10067. 2025-07-20 15:17:29,187 - __main__ - WARNING - Attempt 108: Please wait for sglang server to become ready...
  10068. 2025-07-20 15:17:35,268 - __main__ - WARNING - Attempt 109: Please wait for sglang server to become ready...
  10069. 2025-07-20 15:17:41,347 - __main__ - WARNING - Attempt 110: Please wait for sglang server to become ready...
  10070. 2025-07-20 15:17:47,428 - __main__ - WARNING - Attempt 111: Please wait for sglang server to become ready...
  10071. 2025-07-20 15:17:53,508 - __main__ - WARNING - Attempt 112: Please wait for sglang server to become ready...
  10072. 2025-07-20 15:17:59,588 - __main__ - WARNING - Attempt 113: Please wait for sglang server to become ready...
  10073. 2025-07-20 15:18:00,129 - __main__ - INFO - Got cancellation request for SGLang server
  10074. 2025-07-20 15:19:30,848 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  10075. 2025-07-20 15:19:30,848 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  10076. 2025-07-20 15:19:30,848 - __main__ - INFO - Found 1 total pdf paths to add
  10077. 2025-07-20 15:19:30,851 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  10078. 2025-07-20 15:19:31,040 - __main__ - INFO - Starting pipeline with PID 591212
  10079. 2025-07-20 15:19:31,040 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  10080. 2025-07-20 15:19:36,112 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  10081. 2025-07-20 15:19:37,649 - sglang - INFO - [2025-07-20 15:19:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=862406034, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10082. 2025-07-20 15:19:37,649 - __main__ - INFO - [2025-07-20 15:19:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=862406034, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10083. 2025-07-20 15:19:38,651 - sglang - INFO - [2025-07-20 15:19:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10084. 2025-07-20 15:19:38,651 - __main__ - INFO - [2025-07-20 15:19:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10085. 2025-07-20 15:19:42,272 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  10086. 2025-07-20 15:19:44,865 - sglang - INFO - [2025-07-20 15:19:44 TP0] Overlap scheduler is disabled for multimodal models.
  10087. 2025-07-20 15:19:44,865 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Overlap scheduler is disabled for multimodal models.
  10088. 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10089. 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10090. 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10091. 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10092. 2025-07-20 15:19:44,867 - sglang - INFO - [2025-07-20 15:19:44 TP0] Init torch distributed begin.
  10093. 2025-07-20 15:19:44,867 - __main__ - INFO - [2025-07-20 15:19:44 TP0] Init torch distributed begin.
  10094. 2025-07-20 15:19:48,346 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  10095. 2025-07-20 15:19:50,479 - sglang - INFO - [2025-07-20 15:19:50 TP0] Load weight begin. avail mem=23.33 GB
  10096. 2025-07-20 15:19:50,479 - __main__ - INFO - [2025-07-20 15:19:50 TP0] Load weight begin. avail mem=23.33 GB
  10097. 2025-07-20 15:19:50,998 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10098. 2025-07-20 15:19:50,998 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10099. 2025-07-20 15:19:52,034 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.04s/it]
  10100. 2025-07-20 15:19:52,035 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.04s/it]
  10101. 2025-07-20 15:19:53,425 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.24s/it]
  10102. 2025-07-20 15:19:53,425 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.24s/it]
  10103. 2025-07-20 15:19:54,424 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  10104. 2025-07-20 15:19:54,682 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.25s/it]
  10105. 2025-07-20 15:19:54,682 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.25s/it]
  10106. 2025-07-20 15:19:55,303 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  10107. 2025-07-20 15:19:55,303 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  10108. 2025-07-20 15:19:55,303 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08s/it]
  10109. 2025-07-20 15:19:55,303 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08s/it]
  10110. 2025-07-20 15:19:55,304 - sglang - INFO -
  10111. 2025-07-20 15:19:55,304 - __main__ - INFO -
  10112. 2025-07-20 15:19:55,418 - sglang - INFO - [2025-07-20 15:19:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10113. 2025-07-20 15:19:55,418 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10114. 2025-07-20 15:19:55,424 - sglang - INFO - [2025-07-20 15:19:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10115. 2025-07-20 15:19:55,424 - __main__ - INFO - [2025-07-20 15:19:55 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10116. 2025-07-20 15:19:55,425 - sglang - INFO - [2025-07-20 15:19:55 TP0] Memory pool end. avail mem=5.30 GB
  10117. 2025-07-20 15:19:55,425 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Memory pool end. avail mem=5.30 GB
  10118. 2025-07-20 15:19:55,574 - sglang - INFO - [2025-07-20 15:19:55 TP0] Capture cuda graph begin. This can take up to several minutes.
  10119. 2025-07-20 15:19:55,574 - __main__ - INFO - [2025-07-20 15:19:55 TP0] Capture cuda graph begin. This can take up to several minutes.
  10120. 2025-07-20 15:19:57,491 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.04s/it] 50%|█████ | 2/4 [00:01<00:01, 1.65it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.16it/s] 100%|██████████| 4/4 [00:01<00:00, 2.56it/s] 100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
  10121. 2025-07-20 15:19:57,492 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.04s/it] 50%|█████ | 2/4 [00:01<00:01, 1.65it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.16it/s] 100%|██████████| 4/4 [00:01<00:00, 2.56it/s] 100%|██████████| 4/4 [00:01<00:00, 2.09it/s]
  10122. 2025-07-20 15:19:57,492 - sglang - INFO - [2025-07-20 15:19:57 TP0] Capture cuda graph end. Time elapsed: 1.92 s
  10123. 2025-07-20 15:19:57,492 - __main__ - INFO - [2025-07-20 15:19:57 TP0] Capture cuda graph end. Time elapsed: 1.92 s
  10124. 2025-07-20 15:19:58,175 - sglang - INFO - [2025-07-20 15:19:58 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10125. 2025-07-20 15:19:58,175 - __main__ - INFO - [2025-07-20 15:19:58 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10126. 2025-07-20 15:19:58,276 - sglang - INFO - [2025-07-20 15:19:58] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10127. 2025-07-20 15:19:58,277 - __main__ - INFO - [2025-07-20 15:19:58] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10128. 2025-07-20 15:20:00,503 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  10129. 2025-07-20 15:20:06,583 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  10130. 2025-07-20 15:20:12,663 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  10131. 2025-07-20 15:20:18,755 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  10132. 2025-07-20 15:20:24,835 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  10133. 2025-07-20 15:20:30,915 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  10134. 2025-07-20 15:20:36,995 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  10135. 2025-07-20 15:20:43,070 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  10136. 2025-07-20 15:20:49,149 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  10137. 2025-07-20 15:20:55,239 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  10138. 2025-07-20 15:21:01,331 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  10139. 2025-07-20 15:21:07,412 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  10140. 2025-07-20 15:21:13,491 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  10141. 2025-07-20 15:21:19,572 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  10142. 2025-07-20 15:21:25,652 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  10143. 2025-07-20 15:21:31,733 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  10144. 2025-07-20 15:21:37,771 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  10145. 2025-07-20 15:21:43,885 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  10146. 2025-07-20 15:21:49,967 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  10147. 2025-07-20 15:21:54,867 - sglang - INFO - Process Process-2:
  10148. 2025-07-20 15:21:54,867 - __main__ - INFO - Process Process-2:
  10149. 2025-07-20 15:21:54,867 - sglang - INFO - Process Process-1:
  10150. 2025-07-20 15:21:54,867 - __main__ - INFO - Process Process-1:
  10151. 2025-07-20 15:21:54,868 - __main__ - INFO - Got cancellation request for SGLang server
  10152. 2025-07-20 15:22:05,628 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  10153. 2025-07-20 15:22:05,628 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  10154. 2025-07-20 15:22:05,628 - __main__ - INFO - Found 1 total pdf paths to add
  10155. 2025-07-20 15:22:05,631 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  10156. 2025-07-20 15:22:05,832 - __main__ - INFO - Starting pipeline with PID 592226
  10157. 2025-07-20 15:22:05,832 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  10158. 2025-07-20 15:22:10,904 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  10159. 2025-07-20 15:22:12,064 - sglang - INFO - [2025-07-20 15:22:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=958917757, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10160. 2025-07-20 15:22:12,065 - __main__ - INFO - [2025-07-20 15:22:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=958917757, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10161. 2025-07-20 15:22:13,046 - sglang - INFO - [2025-07-20 15:22:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10162. 2025-07-20 15:22:13,046 - __main__ - INFO - [2025-07-20 15:22:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10163. 2025-07-20 15:22:16,954 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  10164. 2025-07-20 15:22:19,436 - sglang - INFO - [2025-07-20 15:22:19 TP0] Overlap scheduler is disabled for multimodal models.
  10165. 2025-07-20 15:22:19,436 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Overlap scheduler is disabled for multimodal models.
  10166. 2025-07-20 15:22:19,438 - sglang - INFO - [2025-07-20 15:22:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10167. 2025-07-20 15:22:19,438 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10168. 2025-07-20 15:22:19,438 - sglang - INFO - [2025-07-20 15:22:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10169. 2025-07-20 15:22:19,438 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10170. 2025-07-20 15:22:19,439 - sglang - INFO - [2025-07-20 15:22:19 TP0] Init torch distributed begin.
  10171. 2025-07-20 15:22:19,439 - __main__ - INFO - [2025-07-20 15:22:19 TP0] Init torch distributed begin.
  10172. 2025-07-20 15:22:23,038 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  10173. 2025-07-20 15:22:24,881 - sglang - INFO - [2025-07-20 15:22:24 TP0] Load weight begin. avail mem=23.33 GB
  10174. 2025-07-20 15:22:24,881 - __main__ - INFO - [2025-07-20 15:22:24 TP0] Load weight begin. avail mem=23.33 GB
  10175. 2025-07-20 15:22:25,353 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10176. 2025-07-20 15:22:25,353 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10177. 2025-07-20 15:22:26,399 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.05s/it]
  10178. 2025-07-20 15:22:26,399 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.05s/it]
  10179. 2025-07-20 15:22:27,615 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
  10180. 2025-07-20 15:22:27,615 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
  10181. 2025-07-20 15:22:28,814 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.17s/it]
  10182. 2025-07-20 15:22:28,814 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.17s/it]
  10183. 2025-07-20 15:22:29,124 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  10184. 2025-07-20 15:22:29,363 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08it/s]
  10185. 2025-07-20 15:22:29,363 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.08it/s]
  10186. 2025-07-20 15:22:29,363 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  10187. 2025-07-20 15:22:29,363 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:04<00:00, 1.00s/it]
  10188. 2025-07-20 15:22:29,363 - sglang - INFO -
  10189. 2025-07-20 15:22:29,363 - __main__ - INFO -
  10190. 2025-07-20 15:22:29,442 - sglang - INFO - [2025-07-20 15:22:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10191. 2025-07-20 15:22:29,442 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10192. 2025-07-20 15:22:29,448 - sglang - INFO - [2025-07-20 15:22:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10193. 2025-07-20 15:22:29,448 - __main__ - INFO - [2025-07-20 15:22:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10194. 2025-07-20 15:22:29,448 - sglang - INFO - [2025-07-20 15:22:29 TP0] Memory pool end. avail mem=5.30 GB
  10195. 2025-07-20 15:22:29,448 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Memory pool end. avail mem=5.30 GB
  10196. 2025-07-20 15:22:29,613 - sglang - INFO - [2025-07-20 15:22:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  10197. 2025-07-20 15:22:29,613 - __main__ - INFO - [2025-07-20 15:22:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  10198. 2025-07-20 15:22:31,572 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.06s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.12it/s] 100%|██████████| 4/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.05it/s]
  10199. 2025-07-20 15:22:31,572 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.06s/it] 50%|█████ | 2/4 [00:01<00:01, 1.63it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.12it/s] 100%|██████████| 4/4 [00:01<00:00, 2.50it/s] 100%|██████████| 4/4 [00:01<00:00, 2.05it/s]
  10200. 2025-07-20 15:22:31,573 - sglang - INFO - [2025-07-20 15:22:31 TP0] Capture cuda graph end. Time elapsed: 1.96 s
  10201. 2025-07-20 15:22:31,573 - __main__ - INFO - [2025-07-20 15:22:31 TP0] Capture cuda graph end. Time elapsed: 1.96 s
  10202. 2025-07-20 15:22:32,277 - sglang - INFO - [2025-07-20 15:22:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10203. 2025-07-20 15:22:32,277 - __main__ - INFO - [2025-07-20 15:22:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10204. 2025-07-20 15:22:32,383 - sglang - INFO - [2025-07-20 15:22:32] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10205. 2025-07-20 15:22:32,384 - __main__ - INFO - [2025-07-20 15:22:32] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10206. 2025-07-20 15:22:35,205 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  10207. 2025-07-20 15:22:41,287 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  10208. 2025-07-20 15:22:41,728 - __main__ - INFO - Got cancellation request for SGLang server
  10209. 2025-07-20 15:23:40,632 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  10210. 2025-07-20 15:23:40,632 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  10211. 2025-07-20 15:23:40,632 - __main__ - INFO - Found 1 total pdf paths to add
  10212. 2025-07-20 15:23:40,635 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  10213. 2025-07-20 15:23:40,835 - __main__ - INFO - Starting pipeline with PID 593043
  10214. 2025-07-20 15:23:40,835 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  10215. 2025-07-20 15:23:45,912 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  10216. 2025-07-20 15:23:47,694 - sglang - INFO - [2025-07-20 15:23:47] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=371273265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10217. 2025-07-20 15:23:47,694 - __main__ - INFO - [2025-07-20 15:23:47] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=371273265, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10218. 2025-07-20 15:23:48,988 - sglang - INFO - [2025-07-20 15:23:48] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10219. 2025-07-20 15:23:48,988 - __main__ - INFO - [2025-07-20 15:23:48] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10220. 2025-07-20 15:23:51,975 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  10221. 2025-07-20 15:23:55,321 - sglang - INFO - [2025-07-20 15:23:55 TP0] Overlap scheduler is disabled for multimodal models.
  10222. 2025-07-20 15:23:55,321 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Overlap scheduler is disabled for multimodal models.
  10223. 2025-07-20 15:23:55,324 - sglang - INFO - [2025-07-20 15:23:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10224. 2025-07-20 15:23:55,324 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10225. 2025-07-20 15:23:55,324 - sglang - INFO - [2025-07-20 15:23:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10226. 2025-07-20 15:23:55,324 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10227. 2025-07-20 15:23:55,332 - sglang - INFO - [2025-07-20 15:23:55 TP0] Init torch distributed begin.
  10228. 2025-07-20 15:23:55,332 - __main__ - INFO - [2025-07-20 15:23:55 TP0] Init torch distributed begin.
  10229. 2025-07-20 15:23:58,074 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  10230. 2025-07-20 15:24:00,896 - sglang - INFO - [2025-07-20 15:24:00 TP0] Load weight begin. avail mem=23.33 GB
  10231. 2025-07-20 15:24:00,897 - __main__ - INFO - [2025-07-20 15:24:00 TP0] Load weight begin. avail mem=23.33 GB
  10232. 2025-07-20 15:24:01,406 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10233. 2025-07-20 15:24:01,406 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10234. 2025-07-20 15:24:02,529 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.12s/it]
  10235. 2025-07-20 15:24:02,529 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:01<00:03, 1.12s/it]
  10236. 2025-07-20 15:24:03,697 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
  10237. 2025-07-20 15:24:03,697 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.15s/it]
  10238. 2025-07-20 15:24:04,130 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  10239. 2025-07-20 15:24:04,880 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
  10240. 2025-07-20 15:24:04,880 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.16s/it]
  10241. 2025-07-20 15:24:05,350 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  10242. 2025-07-20 15:24:05,350 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.12it/s]
  10243. 2025-07-20 15:24:05,350 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.01it/s]
  10244. 2025-07-20 15:24:05,350 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.01it/s]
  10245. 2025-07-20 15:24:05,351 - sglang - INFO -
  10246. 2025-07-20 15:24:05,351 - __main__ - INFO -
  10247. 2025-07-20 15:24:05,411 - sglang - INFO - [2025-07-20 15:24:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10248. 2025-07-20 15:24:05,412 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10249. 2025-07-20 15:24:05,418 - sglang - INFO - [2025-07-20 15:24:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10250. 2025-07-20 15:24:05,418 - __main__ - INFO - [2025-07-20 15:24:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10251. 2025-07-20 15:24:05,418 - sglang - INFO - [2025-07-20 15:24:05 TP0] Memory pool end. avail mem=5.30 GB
  10252. 2025-07-20 15:24:05,418 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Memory pool end. avail mem=5.30 GB
  10253. 2025-07-20 15:24:05,574 - sglang - INFO - [2025-07-20 15:24:05 TP0] Capture cuda graph begin. This can take up to several minutes.
  10254. 2025-07-20 15:24:05,574 - __main__ - INFO - [2025-07-20 15:24:05 TP0] Capture cuda graph begin. This can take up to several minutes.
  10255. 2025-07-20 15:24:07,535 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.72it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.18it/s] 100%|██████████| 4/4 [00:01<00:00, 2.38it/s] 100%|██████████| 4/4 [00:01<00:00, 2.04it/s]
  10256. 2025-07-20 15:24:07,535 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.72it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.18it/s] 100%|██████████| 4/4 [00:01<00:00, 2.38it/s] 100%|██████████| 4/4 [00:01<00:00, 2.04it/s]
  10257. 2025-07-20 15:24:07,535 - sglang - INFO - [2025-07-20 15:24:07 TP0] Capture cuda graph end. Time elapsed: 1.96 s
  10258. 2025-07-20 15:24:07,536 - __main__ - INFO - [2025-07-20 15:24:07 TP0] Capture cuda graph end. Time elapsed: 1.96 s
  10259. 2025-07-20 15:24:08,225 - sglang - INFO - [2025-07-20 15:24:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10260. 2025-07-20 15:24:08,225 - __main__ - INFO - [2025-07-20 15:24:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10261. 2025-07-20 15:24:08,323 - sglang - INFO - [2025-07-20 15:24:08] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10262. 2025-07-20 15:24:08,323 - __main__ - INFO - [2025-07-20 15:24:08] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30024): address already in use
  10263. 2025-07-20 15:24:10,210 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  10264. 2025-07-20 15:24:16,291 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  10265. 2025-07-20 15:24:22,394 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  10266. 2025-07-20 15:24:28,475 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  10267. 2025-07-20 15:24:34,556 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  10268. 2025-07-20 15:24:40,636 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  10269. 2025-07-20 15:24:44,804 - __main__ - INFO - Got cancellation request for SGLang server
  10270. 2025-07-20 15:24:53,840 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  10271. 2025-07-20 15:24:53,840 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  10272. 2025-07-20 15:24:53,840 - __main__ - INFO - Found 1 total pdf paths to add
  10273. 2025-07-20 15:24:53,843 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  10274. 2025-07-20 15:24:54,044 - __main__ - INFO - Starting pipeline with PID 593960
  10275. 2025-07-20 15:24:54,044 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  10276. 2025-07-20 15:24:54,112 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  10277. 2025-07-20 15:24:55,142 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  10278. 2025-07-20 15:24:56,188 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  10279. 2025-07-20 15:24:57,251 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  10280. 2025-07-20 15:24:58,318 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  10281. 2025-07-20 15:24:59,476 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  10282. 2025-07-20 15:25:00,384 - sglang - INFO - [2025-07-20 15:25:00] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=501574558, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10283. 2025-07-20 15:25:00,384 - __main__ - INFO - [2025-07-20 15:25:00] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=501574558, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  10284. 2025-07-20 15:25:00,682 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  10285. 2025-07-20 15:25:01,384 - sglang - INFO - [2025-07-20 15:25:01] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10286. 2025-07-20 15:25:01,384 - __main__ - INFO - [2025-07-20 15:25:01] Use chat template for the OpenAI-compatible API server: qwen2-vl
  10287. 2025-07-20 15:25:01,764 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  10288. 2025-07-20 15:25:02,832 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  10289. 2025-07-20 15:25:03,900 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  10290. 2025-07-20 15:25:04,968 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  10291. 2025-07-20 15:25:06,142 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  10292. 2025-07-20 15:25:07,214 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  10293. 2025-07-20 15:25:07,905 - sglang - INFO - [2025-07-20 15:25:07 TP0] Overlap scheduler is disabled for multimodal models.
  10294. 2025-07-20 15:25:07,905 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Overlap scheduler is disabled for multimodal models.
  10295. 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10296. 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  10297. 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10298. 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  10299. 2025-07-20 15:25:07,908 - sglang - INFO - [2025-07-20 15:25:07 TP0] Init torch distributed begin.
  10300. 2025-07-20 15:25:07,908 - __main__ - INFO - [2025-07-20 15:25:07 TP0] Init torch distributed begin.
  10301. 2025-07-20 15:25:08,289 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  10302. 2025-07-20 15:25:09,356 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  10303. 2025-07-20 15:25:10,431 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  10304. 2025-07-20 15:25:11,506 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  10305. 2025-07-20 15:25:12,581 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  10306. 2025-07-20 15:25:13,489 - sglang - INFO - [2025-07-20 15:25:13 TP0] Load weight begin. avail mem=23.33 GB
  10307. 2025-07-20 15:25:13,489 - __main__ - INFO - [2025-07-20 15:25:13 TP0] Load weight begin. avail mem=23.33 GB
  10308. 2025-07-20 15:25:13,649 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  10309. 2025-07-20 15:25:14,006 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10310. 2025-07-20 15:25:14,006 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  10311. 2025-07-20 15:25:14,722 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  10312. 2025-07-20 15:25:14,984 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
  10313. 2025-07-20 15:25:14,984 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
  10314. 2025-07-20 15:25:15,799 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  10315. 2025-07-20 15:25:16,073 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.04s/it]
  10316. 2025-07-20 15:25:16,073 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.04s/it]
  10317. 2025-07-20 15:25:16,866 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  10318. 2025-07-20 15:25:17,073 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.02s/it]
  10319. 2025-07-20 15:25:17,073 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.02s/it]
  10320. 2025-07-20 15:25:17,509 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.26it/s]
  10321. 2025-07-20 15:25:17,509 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.26it/s]
  10322. 2025-07-20 15:25:17,509 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  10323. 2025-07-20 15:25:17,509 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.14it/s]
  10324. 2025-07-20 15:25:17,509 - sglang - INFO -
  10325. 2025-07-20 15:25:17,509 - __main__ - INFO -
  10326. 2025-07-20 15:25:17,571 - sglang - INFO - [2025-07-20 15:25:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10327. 2025-07-20 15:25:17,571 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  10328. 2025-07-20 15:25:17,577 - sglang - INFO - [2025-07-20 15:25:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10329. 2025-07-20 15:25:17,578 - __main__ - INFO - [2025-07-20 15:25:17 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  10330. 2025-07-20 15:25:17,578 - sglang - INFO - [2025-07-20 15:25:17 TP0] Memory pool end. avail mem=5.30 GB
  10331. 2025-07-20 15:25:17,578 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Memory pool end. avail mem=5.30 GB
  10332. 2025-07-20 15:25:17,730 - sglang - INFO - [2025-07-20 15:25:17 TP0] Capture cuda graph begin. This can take up to several minutes.
  10333. 2025-07-20 15:25:17,730 - __main__ - INFO - [2025-07-20 15:25:17 TP0] Capture cuda graph begin. This can take up to several minutes.
  10334. 2025-07-20 15:25:17,935 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  10335. 2025-07-20 15:25:19,008 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  10336. 2025-07-20 15:25:19,560 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.75it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.28it/s] 100%|██████████| 4/4 [00:01<00:00, 2.66it/s] 100%|██████████| 4/4 [00:01<00:00, 2.19it/s]
  10337. 2025-07-20 15:25:19,560 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.02it/s] 50%|█████ | 2/4 [00:01<00:01, 1.75it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.28it/s] 100%|██████████| 4/4 [00:01<00:00, 2.66it/s] 100%|██████████| 4/4 [00:01<00:00, 2.19it/s]
  10338. 2025-07-20 15:25:19,560 - sglang - INFO - [2025-07-20 15:25:19 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  10339. 2025-07-20 15:25:19,560 - __main__ - INFO - [2025-07-20 15:25:19 TP0] Capture cuda graph end. Time elapsed: 1.83 s
  10340. 2025-07-20 15:25:20,061 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  10341. 2025-07-20 15:25:20,328 - sglang - INFO - [2025-07-20 15:25:20 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10342. 2025-07-20 15:25:20,328 - __main__ - INFO - [2025-07-20 15:25:20 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  10343. 2025-07-20 15:25:21,149 - __main__ - INFO - sglang server is ready.
  10344. 2025-07-20 15:25:21,150 - __main__ - INFO - Queue remaining: 4
  10345. 2025-07-20 15:25:21,150 - __main__ - INFO -
  10346. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10347. ----------------------------------------------------------------------------------
  10348. 2025-07-20 15:25:21,150 - __main__ - INFO -
  10349. Worker ID
  10350. ---------
  10351. 2025-07-20 15:25:21,150 - __main__ - INFO - Worker 0 processing work item 3a56970ad53c3199997edee4c2904936a58b713f
  10352. 2025-07-20 15:25:21,153 - __main__ - INFO - Created all tasks for 3a56970ad53c3199997edee4c2904936a58b713f
  10353. 2025-07-20 15:25:21,167 - __main__ - INFO - Got 14 pages to do for scripts/data/11440000MB2D0234372440125017009.pdf in worker 0
  10354. 2025-07-20 15:25:21,172 - __main__ - INFO - Got 18 pages to do for scripts/data/11440000MB2D0234372440125017014.pdf in worker 0
  10355. 2025-07-20 15:25:21,176 - __main__ - INFO - Got 18 pages to do for scripts/data/11440000MB2D0234372440125017020.pdf in worker 0
  10356. 2025-07-20 15:25:21,180 - __main__ - INFO - Got 16 pages to do for scripts/data/11440000MB2D0234372440125017028.pdf in worker 0
  10357. 2025-07-20 15:25:21,184 - __main__ - INFO - Got 16 pages to do for scripts/data/11440000MB2D0234372440125017041.pdf in worker 0
  10358. 2025-07-20 15:25:21,189 - __main__ - INFO - Got 17 pages to do for scripts/data/11445200MB2C47380T4440125017008 (1).pdf in worker 0
  10359. 2025-07-20 15:25:21,192 - __main__ - INFO - Got 14 pages to do for scripts/data/11440000MB2D0234372440125017049.pdf in worker 0
  10360. 2025-07-20 15:25:21,196 - __main__ - INFO - Got 17 pages to do for scripts/data/11445200MB2C47380T4440125017008.pdf in worker 0
  10361. 2025-07-20 15:25:21,199 - __main__ - INFO - Got 7 pages to do for scripts/data/11445200MB2D06387W3440125011001.pdf in worker 0
  10362. 2025-07-20 15:25:21,202 - __main__ - INFO - Got 15 pages to do for scripts/data/11445200MB2C47380T4440125017023.pdf in worker 0
  10363. 2025-07-20 15:25:21,205 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017006.pdf in worker 0
  10364. 2025-07-20 15:25:21,208 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017003.pdf in worker 0
  10365. 2025-07-20 15:25:21,211 - __main__ - INFO - Got 19 pages to do for scripts/data/11445200MB2D06387W3440125017011.pdf in worker 0
  10366. 2025-07-20 15:25:21,214 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017007.pdf in worker 0
  10367. 2025-07-20 15:25:21,216 - __main__ - INFO - Got 15 pages to do for scripts/data/11445200MB2D06387W3440125017023.pdf in worker 0
  10368. 2025-07-20 15:25:21,219 - __main__ - INFO - Got 14 pages to do for scripts/data/11445200MB2D06387W3440125017041.pdf in worker 0
  10369. 2025-07-20 15:25:21,220 - __main__ - INFO - Got 5 pages to do for scripts/data/11445200MB2D42580L4442014010000.pdf in worker 0
  10370. 2025-07-20 15:25:21,222 - __main__ - INFO - Got 16 pages to do for scripts/data/11445200MB2D06387W3440125017048.pdf in worker 0
  10371. 2025-07-20 15:25:21,225 - __main__ - INFO - Got 18 pages to do for scripts/data/11445200MB2D6222364440125017008.pdf in worker 0
  10372. 2025-07-20 15:25:21,227 - __main__ - INFO - Got 13 pages to do for scripts/data/11445200MB2D6222364440125017049.pdf in worker 0
  10373. 2025-07-20 15:25:21,228 - __main__ - INFO - Got 4 pages to do for scripts/data/11445202592174409C4442111641000.pdf in worker 0
  10374. 2025-07-20 15:25:21,229 - __main__ - INFO - Got 6 pages to do for scripts/data/11445202592174409C4442111667001.pdf in worker 0
  10375. 2025-07-20 15:25:21,230 - __main__ - INFO - Got 4 pages to do for scripts/data/11445202592174409C4442111820005.pdf in worker 0
  10376. 2025-07-20 15:25:21,232 - __main__ - INFO - Got 14 pages to do for scripts/data/11445202MB2D1177604440125017023.pdf in worker 0
  10377. 2025-07-20 15:25:21,234 - __main__ - INFO - Got 12 pages to do for scripts/data/11445202MB2D1177604440125017027.pdf in worker 0
  10378. 2025-07-20 15:25:21,236 - __main__ - INFO - Got 14 pages to do for scripts/data/11445202MB2D1177604440125017041.pdf in worker 0
  10379. 2025-07-20 15:25:21,237 - __main__ - INFO - Got 3 pages to do for scripts/data/11445202MB2D117760444212503R001.pdf in worker 0
  10380. 2025-07-20 15:25:21,238 - __main__ - INFO - Got 4 pages to do for scripts/data/11445203007030456U4440711000000.pdf in worker 0
  10381. 2025-07-20 15:25:21,239 - __main__ - INFO - Got 5 pages to do for scripts/data/11445203007030456U4442111640000.pdf in worker 0
  10382. 2025-07-20 15:25:21,240 - __main__ - INFO - Got 4 pages to do for scripts/data/11445203007030456U4442111641000.pdf in worker 0
  10383. 2025-07-20 15:25:21,242 - __main__ - INFO - Got 7 pages to do for scripts/data/11445203007030456U44421110A0005.pdf in worker 0
  10384. 2025-07-20 15:25:21,243 - __main__ - INFO - Got 6 pages to do for scripts/data/11445203007030456U4442111667001.pdf in worker 0
  10385. 2025-07-20 15:25:21,244 - __main__ - INFO - Got 5 pages to do for scripts/data/11445203707759010G4442014010000.pdf in worker 0
  10386. 2025-07-20 15:25:21,246 - __main__ - INFO - Got 14 pages to do for scripts/data/11445203MB2C21084N4440125017008.pdf in worker 0
  10387. 2025-07-20 15:25:21,247 - __main__ - INFO - Got 3 pages to do for scripts/data/11445203MB2C21084N444212503R001.pdf in worker 0
  10388. 2025-07-20 15:25:21,248 - __main__ - INFO - Got 4 pages to do for scripts/data/11445222007029500K4440711000000.pdf in worker 0
  10389. 2025-07-20 15:25:21,249 - __main__ - INFO - Got 6 pages to do for scripts/data/11445222007029500K44421110A0001.pdf in worker 0
  10390. 2025-07-20 15:25:21,250 - __main__ - INFO - Got 6 pages to do for scripts/data/11445222007029500K44421110A0005.pdf in worker 0
  10391. 2025-07-20 15:25:21,251 - __main__ - INFO - Got 4 pages to do for scripts/data/11445222007029527B4442106100010.pdf in worker 0
  10392. 2025-07-20 15:25:21,252 - __main__ - INFO - Got 12 pages to do for scripts/data/11445222007030157E4440149001001.pdf in worker 0
  10393. 2025-07-20 15:25:21,253 - __main__ - INFO - Got 4 pages to do for scripts/data/11445224007035644H4440711000000.pdf in worker 0
  10394. 2025-07-20 15:25:21,254 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035644H44421110A0001.pdf in worker 0
  10395. 2025-07-20 15:25:21,255 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035644H44421110A0005.pdf in worker 0
  10396. 2025-07-20 15:25:21,257 - __main__ - INFO - Got 10 pages to do for scripts/data/11445224007035652C4440114020001.pdf in worker 0
  10397. 2025-07-20 15:25:21,257 - __main__ - INFO - Got 5 pages to do for scripts/data/11445224007035652C4442014010000.pdf in worker 0
  10398. 2025-07-20 15:25:21,258 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4440711000000.pdf in worker 0
  10399. 2025-07-20 15:25:21,259 - __main__ - INFO - Got 5 pages to do for scripts/data/11445281588281455A44421110A0001.pdf in worker 0
  10400. 2025-07-20 15:25:21,260 - __main__ - INFO - Got 6 pages to do for scripts/data/11445281588281455A44421110A0005.pdf in worker 0
  10401. 2025-07-20 15:25:21,261 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4442111641000.pdf in worker 0
  10402. 2025-07-20 15:25:21,262 - __main__ - INFO - Got 6 pages to do for scripts/data/11445281588281455A4442111667001.pdf in worker 0
  10403. 2025-07-20 15:25:21,263 - __main__ - INFO - Got 4 pages to do for scripts/data/11445281588281455A4442111820005.pdf in worker 0
  10404. 2025-07-20 15:25:21,768 - __main__ - INFO - Got 6 pages to do for scripts/data/12445200456019383L3442111667001.pdf in worker 0
  10405. 2025-07-20 15:25:21,837 - __main__ - INFO - Got 5 pages to do for scripts/data/12445200726503846U344201405500301.pdf in worker 0
  10406. 2025-07-20 15:25:22,039 - sglang - INFO - [2025-07-20 15:25:21 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  10407. 2025-07-20 15:25:22,039 - __main__ - INFO - [2025-07-20 15:25:21 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  10408. 2025-07-20 15:25:22,040 - __main__ - INFO - sglang running req: 0 queue req: 0
  10409. 2025-07-20 15:25:29,937 - sglang - INFO - [2025-07-20 15:25:29] The server is fired up and ready to roll!
  10410. 2025-07-20 15:25:29,937 - __main__ - INFO - [2025-07-20 15:25:29] The server is fired up and ready to roll!
  10411. 2025-07-20 15:25:31,151 - __main__ - INFO - Queue remaining: 3
  10412. 2025-07-20 15:25:31,151 - __main__ - INFO -
  10413. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10414. ----------------------------------------------------------------------------------
  10415. 2025-07-20 15:25:31,151 - __main__ - INFO -
  10416. Worker ID | started
  10417. ----------+--------
  10418. 0 | 500
  10419. 2025-07-20 15:25:41,152 - __main__ - INFO - Queue remaining: 3
  10420. 2025-07-20 15:25:41,153 - __main__ - INFO -
  10421. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10422. ----------------------------------------------------------------------------------
  10423. 2025-07-20 15:25:41,153 - __main__ - INFO -
  10424. Worker ID | started
  10425. ----------+--------
  10426. 0 | 500
  10427. 2025-07-20 15:25:43,644 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-1
  10428. 2025-07-20 15:25:43,685 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-2
  10429. 2025-07-20 15:25:43,711 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-3
  10430. 2025-07-20 15:25:43,805 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-5
  10431. 2025-07-20 15:25:43,850 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-6
  10432. 2025-07-20 15:25:43,851 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-4
  10433. 2025-07-20 15:25:43,861 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-8
  10434. 2025-07-20 15:25:43,901 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-7
  10435. 2025-07-20 15:25:43,904 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-9
  10436. 2025-07-20 15:25:43,975 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-11
  10437. 2025-07-20 15:25:44,061 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-13
  10438. 2025-07-20 15:25:44,078 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-14
  10439. 2025-07-20 15:25:44,148 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-12
  10440. 2025-07-20 15:25:44,156 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-1
  10441. 2025-07-20 15:25:44,164 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-2
  10442. 2025-07-20 15:25:44,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-3
  10443. 2025-07-20 15:25:44,262 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-5
  10444. 2025-07-20 15:25:44,337 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017009.pdf-10
  10445. 2025-07-20 15:25:44,446 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-8
  10446. 2025-07-20 15:25:44,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-6
  10447. 2025-07-20 15:25:44,464 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-4
  10448. 2025-07-20 15:25:44,538 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-10
  10449. 2025-07-20 15:25:44,549 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-9
  10450. 2025-07-20 15:25:44,572 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-11
  10451. 2025-07-20 15:25:44,650 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-7
  10452. 2025-07-20 15:25:44,664 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-13
  10453. 2025-07-20 15:25:44,751 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-14
  10454. 2025-07-20 15:25:44,844 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-15
  10455. 2025-07-20 15:25:44,846 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-18
  10456. 2025-07-20 15:25:44,858 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-16
  10457. 2025-07-20 15:25:44,954 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-2
  10458. 2025-07-20 15:25:44,956 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-3
  10459. 2025-07-20 15:25:44,961 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-17
  10460. 2025-07-20 15:25:45,041 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-1
  10461. 2025-07-20 15:25:45,063 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-4
  10462. 2025-07-20 15:25:45,150 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-5
  10463. 2025-07-20 15:25:45,233 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-6
  10464. 2025-07-20 15:25:45,244 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-7
  10465. 2025-07-20 15:25:45,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-10
  10466. 2025-07-20 15:25:45,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-8
  10467. 2025-07-20 15:25:45,346 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017014.pdf-12
  10468. 2025-07-20 15:25:45,361 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-9
  10469. 2025-07-20 15:25:45,361 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-11
  10470. 2025-07-20 15:25:45,554 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-15
  10471. 2025-07-20 15:25:45,554 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-14
  10472. 2025-07-20 15:25:45,556 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-12
  10473. 2025-07-20 15:25:45,648 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-17
  10474. 2025-07-20 15:25:45,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-2
  10475. 2025-07-20 15:25:45,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-16
  10476. 2025-07-20 15:25:45,733 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-3
  10477. 2025-07-20 15:25:45,846 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-18
  10478. 2025-07-20 15:25:45,849 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-1
  10479. 2025-07-20 15:25:45,852 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-6
  10480. 2025-07-20 15:25:45,934 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-4
  10481. 2025-07-20 15:25:45,935 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-9
  10482. 2025-07-20 15:25:45,940 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-7
  10483. 2025-07-20 15:25:45,952 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017020.pdf-13
  10484. 2025-07-20 15:25:46,045 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-16
  10485. 2025-07-20 15:25:46,139 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-11
  10486. 2025-07-20 15:25:46,147 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-12
  10487. 2025-07-20 15:25:46,238 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-14
  10488. 2025-07-20 15:25:46,245 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-10
  10489. 2025-07-20 15:25:46,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-2
  10490. 2025-07-20 15:25:46,335 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-3
  10491. 2025-07-20 15:25:46,341 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-7
  10492. 2025-07-20 15:25:46,342 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-1
  10493. 2025-07-20 15:25:46,347 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-10
  10494. 2025-07-20 15:25:46,356 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-6
  10495. 2025-07-20 15:25:46,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-5
  10496. 2025-07-20 15:25:46,449 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-5
  10497. 2025-07-20 15:25:46,534 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-8
  10498. 2025-07-20 15:25:46,535 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-16
  10499. 2025-07-20 15:25:46,535 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-15
  10500. 2025-07-20 15:25:46,548 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-4
  10501. 2025-07-20 15:25:46,552 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-4
  10502. 2025-07-20 15:25:46,637 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-8
  10503. 2025-07-20 15:25:46,649 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-14
  10504. 2025-07-20 15:25:46,651 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-9
  10505. 2025-07-20 15:25:46,734 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-11
  10506. 2025-07-20 15:25:46,736 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-3
  10507. 2025-07-20 15:25:46,739 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-12
  10508. 2025-07-20 15:25:46,744 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-5
  10509. 2025-07-20 15:25:46,834 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017041.pdf-13
  10510. 2025-07-20 15:25:46,836 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-15
  10511. 2025-07-20 15:25:46,855 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-17
  10512. 2025-07-20 15:25:46,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
  10513. 2025-07-20 15:25:46,946 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-6
  10514. 2025-07-20 15:25:47,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-8
  10515. 2025-07-20 15:25:47,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-11
  10516. 2025-07-20 15:25:47,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-13
  10517. 2025-07-20 15:25:47,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-1
  10518. 2025-07-20 15:25:47,041 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017028.pdf-13
  10519. 2025-07-20 15:25:47,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-14
  10520. 2025-07-20 15:25:47,246 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-6
  10521. 2025-07-20 15:25:47,338 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-15
  10522. 2025-07-20 15:25:47,340 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-16
  10523. 2025-07-20 15:25:47,341 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-1
  10524. 2025-07-20 15:25:47,345 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-4
  10525. 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-8
  10526. 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-3
  10527. 2025-07-20 15:25:47,439 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-4
  10528. 2025-07-20 15:25:47,445 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-2
  10529. 2025-07-20 15:25:47,446 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-11
  10530. 2025-07-20 15:25:47,543 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-7
  10531. 2025-07-20 15:25:47,635 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-14
  10532. 2025-07-20 15:25:47,636 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-12
  10533. 2025-07-20 15:25:47,669 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-9
  10534. 2025-07-20 15:25:47,670 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-2
  10535. 2025-07-20 15:25:47,670 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-7
  10536. 2025-07-20 15:25:47,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-8
  10537. 2025-07-20 15:25:47,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-2
  10538. 2025-07-20 15:25:47,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-14
  10539. 2025-07-20 15:25:47,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-11
  10540. 2025-07-20 15:25:47,844 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-9
  10541. 2025-07-20 15:25:47,942 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-7
  10542. 2025-07-20 15:25:47,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-5
  10543. 2025-07-20 15:25:47,946 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-17
  10544. 2025-07-20 15:25:48,033 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-2
  10545. 2025-07-20 15:25:48,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-1
  10546. 2025-07-20 15:25:48,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-12
  10547. 2025-07-20 15:25:48,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-4
  10548. 2025-07-20 15:25:48,145 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-15
  10549. 2025-07-20 15:25:48,237 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-10
  10550. 2025-07-20 15:25:48,238 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-3
  10551. 2025-07-20 15:25:48,239 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-7
  10552. 2025-07-20 15:25:48,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-6
  10553. 2025-07-20 15:25:48,246 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-13
  10554. 2025-07-20 15:25:48,336 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-13
  10555. 2025-07-20 15:25:48,337 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-16
  10556. 2025-07-20 15:25:48,340 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-5
  10557. 2025-07-20 15:25:48,342 - __main__ - INFO - Built page query for scripts/data/11440000MB2D0234372440125017049.pdf-10
  10558. 2025-07-20 15:25:48,344 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-3
  10559. 2025-07-20 15:25:48,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-5
  10560. 2025-07-20 15:25:48,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-15
  10561. 2025-07-20 15:25:48,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-10
  10562. 2025-07-20 15:25:48,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-2
  10563. 2025-07-20 15:25:48,444 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-3
  10564. 2025-07-20 15:25:48,446 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-1
  10565. 2025-07-20 15:25:48,446 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-7
  10566. 2025-07-20 15:25:48,633 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-4
  10567. 2025-07-20 15:25:48,634 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-9
  10568. 2025-07-20 15:25:48,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-8
  10569. 2025-07-20 15:25:48,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-3
  10570. 2025-07-20 15:25:48,637 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-10
  10571. 2025-07-20 15:25:48,639 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-6
  10572. 2025-07-20 15:25:48,641 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-7
  10573. 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-1
  10574. 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-2
  10575. 2025-07-20 15:25:48,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-3
  10576. 2025-07-20 15:25:48,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-4
  10577. 2025-07-20 15:25:48,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-14
  10578. 2025-07-20 15:25:48,741 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-11
  10579. 2025-07-20 15:25:48,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-1
  10580. 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-9
  10581. 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-13
  10582. 2025-07-20 15:25:48,840 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-14
  10583. 2025-07-20 15:25:48,843 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-2
  10584. 2025-07-20 15:25:48,934 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-8
  10585. 2025-07-20 15:25:49,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-14
  10586. 2025-07-20 15:25:49,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-7
  10587. 2025-07-20 15:25:49,038 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-9
  10588. 2025-07-20 15:25:49,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-4
  10589. 2025-07-20 15:25:49,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-3
  10590. 2025-07-20 15:25:49,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-6
  10591. 2025-07-20 15:25:49,044 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-6
  10592. 2025-07-20 15:25:49,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-10
  10593. 2025-07-20 15:25:49,135 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-9
  10594. 2025-07-20 15:25:49,139 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-11
  10595. 2025-07-20 15:25:49,141 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-4
  10596. 2025-07-20 15:25:49,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-2
  10597. 2025-07-20 15:25:49,243 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125011001.pdf-5
  10598. 2025-07-20 15:25:49,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-5
  10599. 2025-07-20 15:25:49,247 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-9
  10600. 2025-07-20 15:25:49,247 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-12
  10601. 2025-07-20 15:25:49,334 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-7
  10602. 2025-07-20 15:25:49,336 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-6
  10603. 2025-07-20 15:25:49,339 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-5
  10604. 2025-07-20 15:25:49,341 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-8
  10605. 2025-07-20 15:25:49,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-19
  10606. 2025-07-20 15:25:49,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-6
  10607. 2025-07-20 15:25:49,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-14
  10608. 2025-07-20 15:25:49,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-10
  10609. 2025-07-20 15:25:49,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-8
  10610. 2025-07-20 15:25:49,742 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-1
  10611. 2025-07-20 15:25:49,745 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-6
  10612. 2025-07-20 15:25:49,833 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-3
  10613. 2025-07-20 15:25:49,834 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-14
  10614. 2025-07-20 15:25:49,837 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-13
  10615. 2025-07-20 15:25:49,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-1
  10616. 2025-07-20 15:25:49,839 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-5
  10617. 2025-07-20 15:25:49,841 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-11
  10618. 2025-07-20 15:25:49,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-18
  10619. 2025-07-20 15:25:49,842 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-16
  10620. 2025-07-20 15:25:49,937 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-4
  10621. 2025-07-20 15:25:49,937 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-2
  10622. 2025-07-20 15:25:49,939 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-8
  10623. 2025-07-20 15:25:49,941 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-7
  10624. 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-4
  10625. 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-1
  10626. 2025-07-20 15:25:49,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-3
  10627. 2025-07-20 15:25:50,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-5
  10628. 2025-07-20 15:25:50,036 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-15
  10629. 2025-07-20 15:25:50,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-9
  10630. 2025-07-20 15:25:50,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-13
  10631. 2025-07-20 15:25:50,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-2
  10632. 2025-07-20 15:25:50,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-10
  10633. 2025-07-20 15:25:50,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-8
  10634. 2025-07-20 15:25:50,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-17
  10635. 2025-07-20 15:25:50,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-11
  10636. 2025-07-20 15:25:50,134 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-13
  10637. 2025-07-20 15:25:50,136 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-6
  10638. 2025-07-20 15:25:50,138 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-3
  10639. 2025-07-20 15:25:50,139 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-9
  10640. 2025-07-20 15:25:50,140 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-9
  10641. 2025-07-20 15:25:50,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-10
  10642. 2025-07-20 15:25:50,144 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-6
  10643. 2025-07-20 15:25:50,544 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-5
  10644. 2025-07-20 15:25:50,544 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017023.pdf-12
  10645. 2025-07-20 15:25:50,638 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-12
  10646. 2025-07-20 15:25:50,641 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-11
  10647. 2025-07-20 15:25:50,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-13
  10648. 2025-07-20 15:25:50,644 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-5
  10649. 2025-07-20 15:25:50,734 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-14
  10650. 2025-07-20 15:25:50,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-3
  10651. 2025-07-20 15:25:50,735 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-2
  10652. 2025-07-20 15:25:50,942 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-3
  10653. 2025-07-20 15:25:50,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-7
  10654. 2025-07-20 15:25:50,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-4
  10655. 2025-07-20 15:25:50,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017006.pdf-12
  10656. 2025-07-20 15:25:50,944 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-10
  10657. 2025-07-20 15:25:51,033 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-5
  10658. 2025-07-20 15:25:51,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-14
  10659. 2025-07-20 15:25:51,035 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-2
  10660. 2025-07-20 15:25:51,036 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-5
  10661. 2025-07-20 15:25:51,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-1
  10662. 2025-07-20 15:25:51,039 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-13
  10663. 2025-07-20 15:25:51,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017011.pdf-15
  10664. 2025-07-20 15:25:51,042 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-7
  10665. 2025-07-20 15:25:51,044 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-4
  10666. 2025-07-20 15:25:51,045 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-13
  10667. 2025-07-20 15:25:51,136 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-8
  10668. 2025-07-20 15:25:51,137 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-12
  10669. 2025-07-20 15:25:51,140 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-16
  10670. 2025-07-20 15:25:51,141 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-6
  10671. 2025-07-20 15:25:51,142 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017003.pdf-10
  10672. 2025-07-20 15:25:51,143 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-2
  10673. 2025-07-20 15:25:51,234 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-9
  10674. 2025-07-20 15:25:51,236 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-10
  10675. 2025-07-20 15:25:51,635 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-4
  10676. 2025-07-20 15:25:51,639 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-6
  10677. 2025-07-20 15:25:51,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017007.pdf-12
  10678. 2025-07-20 15:25:51,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-3
  10679. 2025-07-20 15:25:51,643 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-15
  10680. 2025-07-20 15:25:52,238 - __main__ - INFO - Queue remaining: 3
  10681. 2025-07-20 15:25:52,238 - __main__ - INFO -
  10682. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10683. ----------------------------------------------------------------------------------
  10684. 2025-07-20 15:25:52,238 - __main__ - INFO -
  10685. Worker ID | started
  10686. ----------+--------
  10687. 0 | 500
  10688. 2025-07-20 15:25:52,239 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-14
  10689. 2025-07-20 15:25:52,241 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-7
  10690. 2025-07-20 15:25:52,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-11
  10691. 2025-07-20 15:25:52,335 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
  10692. 2025-07-20 15:25:52,338 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-5
  10693. 2025-07-20 15:25:52,342 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-8
  10694. 2025-07-20 15:25:52,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-14
  10695. 2025-07-20 15:25:52,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-1
  10696. 2025-07-20 15:25:52,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017023.pdf-1
  10697. 2025-07-20 15:25:52,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-2
  10698. 2025-07-20 15:25:52,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-1
  10699. 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-9
  10700. 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-5
  10701. 2025-07-20 15:25:52,347 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-13
  10702. 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-16
  10703. 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-11
  10704. 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-4
  10705. 2025-07-20 15:25:52,348 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-7
  10706. 2025-07-20 15:25:52,349 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-11
  10707. 2025-07-20 15:25:52,434 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-6
  10708. 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-4
  10709. 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-6
  10710. 2025-07-20 15:25:52,435 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-10
  10711. 2025-07-20 15:25:52,537 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-8
  10712. 2025-07-20 15:25:52,543 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-3
  10713. 2025-07-20 15:25:52,543 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-3
  10714. 2025-07-20 15:25:52,632 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-8
  10715. 2025-07-20 15:25:52,633 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-12
  10716. 2025-07-20 15:25:52,634 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-2
  10717. 2025-07-20 15:25:52,635 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-2
  10718. 2025-07-20 15:25:52,635 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-1
  10719. 2025-07-20 15:25:52,636 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-1
  10720. 2025-07-20 15:25:52,637 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-12
  10721. 2025-07-20 15:25:52,640 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-9
  10722. 2025-07-20 15:25:52,943 - __main__ - INFO - Built page query for scripts/data/11445200MB2D42580L4442014010000.pdf-4
  10723. 2025-07-20 15:25:53,034 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-11
  10724. 2025-07-20 15:25:53,036 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-2
  10725. 2025-07-20 15:25:53,037 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-11
  10726. 2025-07-20 15:25:53,037 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-5
  10727. 2025-07-20 15:25:53,040 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017041.pdf-2
  10728. 2025-07-20 15:25:53,041 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-1
  10729. 2025-07-20 15:25:53,042 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111667001.pdf-1
  10730. 2025-07-20 15:25:53,044 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-3
  10731. 2025-07-20 15:25:53,134 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-1
  10732. 2025-07-20 15:25:53,135 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-2
  10733. 2025-07-20 15:25:53,136 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-4
  10734. 2025-07-20 15:25:53,142 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-14
  10735. 2025-07-20 15:25:53,236 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-6
  10736. 2025-07-20 15:25:53,238 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-7
  10737. 2025-07-20 15:25:53,332 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-5
  10738. 2025-07-20 15:25:53,337 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-3
  10739. 2025-07-20 15:25:53,339 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-6
  10740. 2025-07-20 15:25:53,340 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-3
  10741. 2025-07-20 15:25:53,343 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-8
  10742. 2025-07-20 15:25:53,345 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-13
  10743. 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-10
  10744. 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-4
  10745. 2025-07-20 15:25:53,346 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-7
  10746. 2025-07-20 15:25:53,347 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-9
  10747. 2025-07-20 15:25:54,146 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-12
  10748. 2025-07-20 15:25:54,233 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-1
  10749. 2025-07-20 15:25:54,234 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-13
  10750. 2025-07-20 15:25:54,235 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-2
  10751. 2025-07-20 15:25:54,236 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-6
  10752. 2025-07-20 15:25:54,237 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-11
  10753. 2025-07-20 15:25:54,238 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-7
  10754. 2025-07-20 15:25:54,241 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-12
  10755. 2025-07-20 15:25:54,244 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-18
  10756. 2025-07-20 15:25:54,334 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-2
  10757. 2025-07-20 15:25:54,336 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-5
  10758. 2025-07-20 15:25:54,340 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-3
  10759. 2025-07-20 15:25:54,343 - __main__ - INFO - Built page query for scripts/data/11445200MB2D06387W3440125017048.pdf-12
  10760. 2025-07-20 15:25:54,435 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-9
  10761. 2025-07-20 15:25:54,439 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-9
  10762. 2025-07-20 15:25:54,441 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-8
  10763. 2025-07-20 15:25:54,442 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-11
  10764. 2025-07-20 15:25:54,535 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-12
  10765. 2025-07-20 15:25:54,539 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-15
  10766. 2025-07-20 15:25:54,542 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-4
  10767. 2025-07-20 15:25:54,643 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-3
  10768. 2025-07-20 15:25:54,737 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-3
  10769. 2025-07-20 15:25:54,742 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-2
  10770. 2025-07-20 15:25:54,833 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-2
  10771. 2025-07-20 15:25:54,840 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-4
  10772. 2025-07-20 15:25:54,840 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017027.pdf-10
  10773. 2025-07-20 15:25:54,842 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-5
  10774. 2025-07-20 15:25:54,843 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111820005.pdf-4
  10775. 2025-07-20 15:25:54,935 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-5
  10776. 2025-07-20 15:25:54,936 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-14
  10777. 2025-07-20 15:25:54,937 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-3
  10778. 2025-07-20 15:25:54,938 - __main__ - INFO - Built page query for scripts/data/11445202592174409C4442111641000.pdf-3
  10779. 2025-07-20 15:25:54,940 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-8
  10780. 2025-07-20 15:25:55,037 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-7
  10781. 2025-07-20 15:25:55,040 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-10
  10782. 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-12
  10783. 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-2
  10784. 2025-07-20 15:25:55,045 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-13
  10785. 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017049.pdf-4
  10786. 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445202MB2D117760444212503R001.pdf-1
  10787. 2025-07-20 15:25:55,046 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-2
  10788. 2025-07-20 15:25:55,137 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-2
  10789. 2025-07-20 15:25:55,139 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-4
  10790. 2025-07-20 15:25:55,144 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-3
  10791. 2025-07-20 15:25:55,443 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-4
  10792. 2025-07-20 15:25:55,444 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-6
  10793. 2025-07-20 15:25:55,445 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-4
  10794. 2025-07-20 15:25:55,533 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-5
  10795. 2025-07-20 15:25:55,535 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4440711000000.pdf-1
  10796. 2025-07-20 15:25:55,541 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-3
  10797. 2025-07-20 15:25:55,542 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-3
  10798. 2025-07-20 15:25:55,543 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-1
  10799. 2025-07-20 15:25:55,635 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-2
  10800. 2025-07-20 15:25:55,636 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-7
  10801. 2025-07-20 15:25:55,644 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-1
  10802. 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-1
  10803. 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-3
  10804. 2025-07-20 15:25:55,645 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111641000.pdf-3
  10805. 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-5
  10806. 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-5
  10807. 2025-07-20 15:25:55,646 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-4
  10808. 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-17
  10809. 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-2
  10810. 2025-07-20 15:25:55,647 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-10
  10811. 2025-07-20 15:25:55,648 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-14
  10812. 2025-07-20 15:25:55,741 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-8
  10813. 2025-07-20 15:25:55,743 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-2
  10814. 2025-07-20 15:25:55,743 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-1
  10815. 2025-07-20 15:25:55,744 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-6
  10816. 2025-07-20 15:25:55,832 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-4
  10817. 2025-07-20 15:25:55,834 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-13
  10818. 2025-07-20 15:25:55,836 - __main__ - INFO - Built page query for scripts/data/11445203707759010G4442014010000.pdf-4
  10819. 2025-07-20 15:25:55,839 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-9
  10820. 2025-07-20 15:25:55,840 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-5
  10821. 2025-07-20 15:25:55,842 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-3
  10822. 2025-07-20 15:25:55,844 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-2
  10823. 2025-07-20 15:25:55,935 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-6
  10824. 2025-07-20 15:25:55,937 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-2
  10825. 2025-07-20 15:25:55,939 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-4
  10826. 2025-07-20 15:25:55,940 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-1
  10827. 2025-07-20 15:25:55,940 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-3
  10828. 2025-07-20 15:25:55,941 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-2
  10829. 2025-07-20 15:25:55,942 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-3
  10830. 2025-07-20 15:25:55,943 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-7
  10831. 2025-07-20 15:25:55,945 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-12
  10832. 2025-07-20 15:25:55,946 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-10
  10833. 2025-07-20 15:25:56,033 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N4440125017008.pdf-11
  10834. 2025-07-20 15:25:56,035 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-6
  10835. 2025-07-20 15:25:56,036 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-2
  10836. 2025-07-20 15:25:56,042 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-1
  10837. 2025-07-20 15:25:56,045 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-1
  10838. 2025-07-20 15:25:56,136 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-3
  10839. 2025-07-20 15:25:56,138 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-4
  10840. 2025-07-20 15:25:56,138 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-12
  10841. 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007029500K4440711000000.pdf-1
  10842. 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-3
  10843. 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-4
  10844. 2025-07-20 15:25:56,139 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-5
  10845. 2025-07-20 15:25:56,737 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-4
  10846. 2025-07-20 15:25:56,738 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-2
  10847. 2025-07-20 15:25:56,738 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-3
  10848. 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017023.pdf-1
  10849. 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-1
  10850. 2025-07-20 15:25:56,741 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-1
  10851. 2025-07-20 15:25:56,742 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-1
  10852. 2025-07-20 15:25:56,745 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-2
  10853. 2025-07-20 15:25:56,746 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0001.pdf-5
  10854. 2025-07-20 15:25:56,833 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-11
  10855. 2025-07-20 15:25:56,838 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-5
  10856. 2025-07-20 15:25:56,840 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-7
  10857. 2025-07-20 15:25:56,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-2
  10858. 2025-07-20 15:25:56,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-2
  10859. 2025-07-20 15:25:56,933 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-8
  10860. 2025-07-20 15:25:56,934 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111667001.pdf-2
  10861. 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445202MB2D1177604440125017041.pdf-11
  10862. 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-1
  10863. 2025-07-20 15:25:56,935 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-5
  10864. 2025-07-20 15:25:56,936 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-10
  10865. 2025-07-20 15:25:56,941 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-2
  10866. 2025-07-20 15:25:56,944 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-5
  10867. 2025-07-20 15:25:56,944 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-7
  10868. 2025-07-20 15:25:56,945 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-5
  10869. 2025-07-20 15:25:57,062 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-3
  10870. 2025-07-20 15:25:57,062 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-3
  10871. 2025-07-20 15:25:57,134 - __main__ - INFO - Built page query for scripts/data/11445203007030456U4442111640000.pdf-1
  10872. 2025-07-20 15:25:57,135 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-3
  10873. 2025-07-20 15:25:57,135 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-5
  10874. 2025-07-20 15:25:57,138 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-2
  10875. 2025-07-20 15:25:57,141 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-4
  10876. 2025-07-20 15:25:57,144 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-1
  10877. 2025-07-20 15:25:57,146 - __main__ - INFO - Built page query for scripts/data/11445203007030456U44421110A0005.pdf-6
  10878. 2025-07-20 15:25:57,238 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
  10879. 2025-07-20 15:25:57,240 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-8
  10880. 2025-07-20 15:25:57,242 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-6
  10881. 2025-07-20 15:25:57,243 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-5
  10882. 2025-07-20 15:25:57,245 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-3
  10883. 2025-07-20 15:25:57,247 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-2
  10884. 2025-07-20 15:25:57,334 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-4
  10885. 2025-07-20 15:25:57,334 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-6
  10886. 2025-07-20 15:25:57,337 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-4
  10887. 2025-07-20 15:25:57,345 - __main__ - INFO - Built page query for scripts/data/11445203MB2C21084N444212503R001.pdf-1
  10888. 2025-07-20 15:25:57,345 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-4
  10889. 2025-07-20 15:25:57,346 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0005.pdf-4
  10890. 2025-07-20 15:25:57,433 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-2
  10891. 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4440711000000.pdf-1
  10892. 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-6
  10893. 2025-07-20 15:25:57,434 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4442014010000.pdf-1
  10894. 2025-07-20 15:25:57,637 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-3
  10895. 2025-07-20 15:25:57,637 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-1
  10896. 2025-07-20 15:25:57,638 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-3
  10897. 2025-07-20 15:25:57,638 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-2
  10898. 2025-07-20 15:25:57,639 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-4
  10899. 2025-07-20 15:25:57,639 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-4
  10900. 2025-07-20 15:25:57,641 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-1
  10901. 2025-07-20 15:25:57,643 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-10
  10902. 2025-07-20 15:25:57,644 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-2
  10903. 2025-07-20 15:25:57,734 - __main__ - INFO - Built page query for scripts/data/11445222007029500K44421110A0005.pdf-4
  10904. 2025-07-20 15:25:57,735 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-6
  10905. 2025-07-20 15:25:57,738 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-5
  10906. 2025-07-20 15:25:57,738 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-5
  10907. 2025-07-20 15:25:57,741 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-4
  10908. 2025-07-20 15:25:57,743 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-6
  10909. 2025-07-20 15:25:57,743 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-5
  10910. 2025-07-20 15:25:57,744 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-3
  10911. 2025-07-20 15:25:57,746 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-4
  10912. 2025-07-20 15:25:57,833 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-4
  10913. 2025-07-20 15:25:57,834 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-1
  10914. 2025-07-20 15:25:57,834 - __main__ - INFO - Built page query for scripts/data/11445222007029527B4442106100010.pdf-3
  10915. 2025-07-20 15:25:57,835 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111667001.pdf-1
  10916. 2025-07-20 15:25:57,836 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-2
  10917. 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-5
  10918. 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-3
  10919. 2025-07-20 15:25:57,838 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-3
  10920. 2025-07-20 15:25:57,839 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-4
  10921. 2025-07-20 15:25:57,839 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-1
  10922. 2025-07-20 15:25:57,840 - __main__ - INFO - Built page query for scripts/data/11445224007035644H4440711000000.pdf-1
  10923. 2025-07-20 15:25:57,842 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-4
  10924. 2025-07-20 15:25:57,844 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-4
  10925. 2025-07-20 15:25:57,845 - __main__ - INFO - Built page query for scripts/data/11445224007035652C4440114020001.pdf-9
  10926. 2025-07-20 15:25:57,846 - __main__ - INFO - Built page query for scripts/data/11445222007030157E4440149001001.pdf-9
  10927. 2025-07-20 15:25:57,935 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-3
  10928. 2025-07-20 15:25:57,938 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-2
  10929. 2025-07-20 15:25:57,939 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0001.pdf-1
  10930. 2025-07-20 15:25:58,534 - __main__ - INFO - Built page query for scripts/data/11445281588281455A44421110A0005.pdf-2
  10931. 2025-07-20 15:25:58,836 - __main__ - INFO - Built page query for scripts/data/12445200456019383L3442111667001.pdf-2
  10932. 2025-07-20 15:25:58,838 - __main__ - INFO - Built page query for scripts/data/12445200726503846U344201405500301.pdf-1
  10933. 2025-07-20 15:25:58,838 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111641000.pdf-1
  10934. 2025-07-20 15:25:58,845 - __main__ - INFO - Built page query for scripts/data/11445281588281455A4442111820005.pdf-3
  10935. 2025-07-20 15:26:00,241 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  10936. 2025-07-20 15:26:00,743 - __main__ - INFO - Worker 1 processing work item 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
  10937. 2025-07-20 15:26:00,745 - __main__ - INFO - Created all tasks for 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
  10938. 2025-07-20 15:26:02,233 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf in worker 1
  10939. 2025-07-20 15:26:03,633 - __main__ - INFO - Queue remaining: 2
  10940. 2025-07-20 15:26:03,634 - __main__ - INFO -
  10941. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10942. ----------------------------------------------------------------------------------
  10943. 2025-07-20 15:26:03,639 - __main__ - INFO -
  10944. Worker ID | started
  10945. ----------+--------
  10946. 0 | 500
  10947. 1 | 10
  10948. 2025-07-20 15:26:04,373 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-10
  10949. 2025-07-20 15:26:04,871 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-1
  10950. 2025-07-20 15:26:05,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-6
  10951. 2025-07-20 15:26:05,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-2
  10952. 2025-07-20 15:26:06,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-9
  10953. 2025-07-20 15:26:06,245 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-3
  10954. 2025-07-20 15:26:06,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-7
  10955. 2025-07-20 15:26:06,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-5
  10956. 2025-07-20 15:26:06,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-4
  10957. 2025-07-20 15:26:07,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/tobacco_missed_tokens_pg1.pdf-8
  10958. 2025-07-20 15:26:13,640 - __main__ - INFO - Queue remaining: 2
  10959. 2025-07-20 15:26:13,641 - __main__ - INFO -
  10960. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10961. ----------------------------------------------------------------------------------
  10962. 2025-07-20 15:26:13,641 - __main__ - INFO -
  10963. Worker ID | started
  10964. ----------+--------
  10965. 0 | 500
  10966. 1 | 10
  10967. 2025-07-20 15:26:13,758 - sglang - INFO - [2025-07-20 15:26:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  10968. 2025-07-20 15:26:13,759 - __main__ - INFO - sglang running req: 0 queue req: 0
  10969. 2025-07-20 15:26:19,544 - sglang - INFO - [2025-07-20 15:26:19 TP0] Prefill batch. #new-seq: 6, #new-token: 13654, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 103
  10970. 2025-07-20 15:26:19,544 - __main__ - INFO - sglang running req: 1 queue req: 103
  10971. 2025-07-20 15:26:23,642 - __main__ - INFO - Queue remaining: 2
  10972. 2025-07-20 15:26:23,643 - __main__ - INFO -
  10973. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10974. ----------------------------------------------------------------------------------
  10975. 2025-07-20 15:26:23,643 - __main__ - INFO -
  10976. Worker ID | started
  10977. ----------+--------
  10978. 0 | 500
  10979. 1 | 10
  10980. 2025-07-20 15:26:32,078 - sglang - INFO - [2025-07-20 15:26:32 TP0] Decode batch. #running-req: 7, #token: 15706, token usage: 0.41, gen throughput (token/s): 3.32, #queue-req: 288
  10981. 2025-07-20 15:26:32,078 - __main__ - INFO - sglang running req: 7 queue req: 288
  10982. 2025-07-20 15:26:33,644 - __main__ - INFO - Queue remaining: 2
  10983. 2025-07-20 15:26:33,645 - __main__ - INFO -
  10984. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  10985. ----------------------------------------------------------------------------------
  10986. 2025-07-20 15:26:33,645 - __main__ - INFO -
  10987. Worker ID | started
  10988. ----------+--------
  10989. 0 | 500
  10990. 1 | 10
  10991. 2025-07-20 15:26:33,938 - sglang - INFO - [2025-07-20 15:26:33 TP0] Decode batch. #running-req: 7, #token: 15986, token usage: 0.42, gen throughput (token/s): 150.47, #queue-req: 328
  10992. 2025-07-20 15:26:33,939 - __main__ - INFO - sglang running req: 7 queue req: 328
  10993. 2025-07-20 15:26:35,633 - sglang - INFO - [2025-07-20 15:26:35 TP0] Decode batch. #running-req: 7, #token: 16266, token usage: 0.43, gen throughput (token/s): 165.22, #queue-req: 371
  10994. 2025-07-20 15:26:35,633 - __main__ - INFO - sglang running req: 7 queue req: 371
  10995. 2025-07-20 15:26:37,071 - sglang - INFO - [2025-07-20 15:26:37 TP0] Prefill batch. #new-seq: 2, #new-token: 5491, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.40, #running-req: 6, #queue-req: 416
  10996. 2025-07-20 15:26:37,071 - __main__ - INFO - sglang running req: 6 queue req: 416
  10997. 2025-07-20 15:26:39,583 - sglang - INFO - [2025-07-20 15:26:39 TP0] Decode batch. #running-req: 8, #token: 20525, token usage: 0.54, gen throughput (token/s): 71.39, #queue-req: 482
  10998. 2025-07-20 15:26:39,584 - __main__ - INFO - sglang running req: 8 queue req: 482
  10999. 2025-07-20 15:26:40,731 - sglang - INFO - [2025-07-20 15:26:40 TP0] Decode batch. #running-req: 8, #token: 20845, token usage: 0.55, gen throughput (token/s): 279.01, #queue-req: 501
  11000. 2025-07-20 15:26:40,731 - __main__ - INFO - sglang running req: 8 queue req: 501
  11001. 2025-07-20 15:26:41,631 - sglang - INFO - [2025-07-20 15:26:41 TP0] Decode batch. #running-req: 8, #token: 21165, token usage: 0.56, gen throughput (token/s): 355.27, #queue-req: 501
  11002. 2025-07-20 15:26:41,631 - __main__ - INFO - sglang running req: 8 queue req: 501
  11003. 2025-07-20 15:26:42,534 - sglang - INFO - [2025-07-20 15:26:42 TP0] Decode batch. #running-req: 8, #token: 21485, token usage: 0.57, gen throughput (token/s): 354.55, #queue-req: 501
  11004. 2025-07-20 15:26:42,534 - __main__ - INFO - sglang running req: 8 queue req: 501
  11005. 2025-07-20 15:26:43,436 - sglang - INFO - [2025-07-20 15:26:43 TP0] Decode batch. #running-req: 8, #token: 21805, token usage: 0.57, gen throughput (token/s): 354.47, #queue-req: 501
  11006. 2025-07-20 15:26:43,437 - __main__ - INFO - sglang running req: 8 queue req: 501
  11007. 2025-07-20 15:26:43,646 - __main__ - INFO - Queue remaining: 2
  11008. 2025-07-20 15:26:43,647 - __main__ - INFO -
  11009. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11010. ----------------------------------------------------------------------------------
  11011. sglang_input_tokens 12.43 12.43
  11012. sglang_output_tokens 1.37 1.37
  11013. 2025-07-20 15:26:43,647 - __main__ - INFO -
  11014. Worker ID | finished | started
  11015. ----------+----------+--------
  11016. 0 | 1 | 500
  11017. 1 | 0 | 10
  11018. 2025-07-20 15:26:44,340 - sglang - INFO - [2025-07-20 15:26:44 TP0] Decode batch. #running-req: 8, #token: 22125, token usage: 0.58, gen throughput (token/s): 354.26, #queue-req: 501
  11019. 2025-07-20 15:26:44,340 - __main__ - INFO - sglang running req: 8 queue req: 501
  11020. 2025-07-20 15:26:45,246 - sglang - INFO - [2025-07-20 15:26:45 TP0] Decode batch. #running-req: 8, #token: 22445, token usage: 0.59, gen throughput (token/s): 353.26, #queue-req: 501
  11021. 2025-07-20 15:26:45,246 - __main__ - INFO - sglang running req: 8 queue req: 501
  11022. 2025-07-20 15:26:46,152 - sglang - INFO - [2025-07-20 15:26:46 TP0] Decode batch. #running-req: 8, #token: 22765, token usage: 0.60, gen throughput (token/s): 352.91, #queue-req: 501
  11023. 2025-07-20 15:26:46,153 - __main__ - INFO - sglang running req: 8 queue req: 501
  11024. 2025-07-20 15:26:47,059 - sglang - INFO - [2025-07-20 15:26:47 TP0] Decode batch. #running-req: 8, #token: 23085, token usage: 0.61, gen throughput (token/s): 353.09, #queue-req: 501
  11025. 2025-07-20 15:26:47,059 - __main__ - INFO - sglang running req: 8 queue req: 501
  11026. 2025-07-20 15:26:47,445 - sglang - INFO - [2025-07-20 15:26:47 TP0] Prefill batch. #new-seq: 2, #new-token: 4317, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 499
  11027. 2025-07-20 15:26:47,445 - __main__ - INFO - sglang running req: 7 queue req: 499
  11028. 2025-07-20 15:26:49,333 - sglang - INFO - [2025-07-20 15:26:49 TP0] Decode batch. #running-req: 9, #token: 25434, token usage: 0.67, gen throughput (token/s): 150.38, #queue-req: 499
  11029. 2025-07-20 15:26:49,333 - __main__ - INFO - sglang running req: 9 queue req: 499
  11030. 2025-07-20 15:26:49,452 - sglang - INFO - [2025-07-20 15:26:49 TP0] Prefill batch. #new-seq: 2, #new-token: 4449, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 8, #queue-req: 497
  11031. 2025-07-20 15:26:49,452 - __main__ - INFO - sglang running req: 8 queue req: 497
  11032. 2025-07-20 15:26:51,267 - sglang - INFO - [2025-07-20 15:26:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2410, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 496
  11033. 2025-07-20 15:26:51,268 - __main__ - INFO - sglang running req: 9 queue req: 496
  11034. 2025-07-20 15:26:52,373 - sglang - INFO - [2025-07-20 15:26:52 TP0] Decode batch. #running-req: 10, #token: 27147, token usage: 0.71, gen throughput (token/s): 129.27, #queue-req: 496
  11035. 2025-07-20 15:26:52,373 - __main__ - INFO - sglang running req: 10 queue req: 496
  11036. 2025-07-20 15:26:53,344 - sglang - INFO - [2025-07-20 15:26:53 TP0] Decode batch. #running-req: 10, #token: 27547, token usage: 0.73, gen throughput (token/s): 412.03, #queue-req: 496
  11037. 2025-07-20 15:26:53,344 - __main__ - INFO - sglang running req: 10 queue req: 496
  11038. 2025-07-20 15:26:53,648 - __main__ - INFO - Queue remaining: 2
  11039. 2025-07-20 15:26:53,648 - __main__ - INFO -
  11040. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11041. ----------------------------------------------------------------------------------
  11042. sglang_input_tokens 64.03 64.03
  11043. sglang_output_tokens 14.15 14.15
  11044. 2025-07-20 15:26:53,648 - __main__ - INFO -
  11045. Worker ID | finished | started
  11046. ----------+----------+--------
  11047. 0 | 4 | 500
  11048. 1 | 0 | 10
  11049. 2025-07-20 15:26:54,313 - sglang - INFO - [2025-07-20 15:26:54 TP0] Decode batch. #running-req: 10, #token: 27947, token usage: 0.74, gen throughput (token/s): 412.69, #queue-req: 496
  11050. 2025-07-20 15:26:54,313 - __main__ - INFO - sglang running req: 10 queue req: 496
  11051. 2025-07-20 15:26:54,992 - sglang - INFO - [2025-07-20 15:26:54 TP0] Prefill batch. #new-seq: 2, #new-token: 3657, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 494
  11052. 2025-07-20 15:26:54,992 - __main__ - INFO - sglang running req: 9 queue req: 494
  11053. 2025-07-20 15:26:56,473 - sglang - INFO - [2025-07-20 15:26:56 TP0] Decode batch. #running-req: 11, #token: 29185, token usage: 0.77, gen throughput (token/s): 190.26, #queue-req: 494
  11054. 2025-07-20 15:26:56,474 - __main__ - INFO - sglang running req: 11 queue req: 494
  11055. 2025-07-20 15:26:57,452 - sglang - INFO - [2025-07-20 15:26:57 TP0] Decode batch. #running-req: 11, #token: 29625, token usage: 0.78, gen throughput (token/s): 449.47, #queue-req: 494
  11056. 2025-07-20 15:26:57,452 - __main__ - INFO - sglang running req: 11 queue req: 494
  11057. 2025-07-20 15:26:58,433 - sglang - INFO - [2025-07-20 15:26:58 TP0] Decode batch. #running-req: 11, #token: 30065, token usage: 0.79, gen throughput (token/s): 448.41, #queue-req: 494
  11058. 2025-07-20 15:26:58,434 - __main__ - INFO - sglang running req: 11 queue req: 494
  11059. 2025-07-20 15:26:59,415 - sglang - INFO - [2025-07-20 15:26:59 TP0] Decode batch. #running-req: 11, #token: 30505, token usage: 0.80, gen throughput (token/s): 448.19, #queue-req: 494
  11060. 2025-07-20 15:26:59,415 - __main__ - INFO - sglang running req: 11 queue req: 494
  11061. 2025-07-20 15:27:00,030 - sglang - INFO - [2025-07-20 15:27:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2750, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 493
  11062. 2025-07-20 15:27:00,030 - __main__ - INFO - sglang running req: 10 queue req: 493
  11063. 2025-07-20 15:27:01,189 - sglang - INFO - [2025-07-20 15:27:01 TP0] Decode batch. #running-req: 11, #token: 31492, token usage: 0.83, gen throughput (token/s): 247.43, #queue-req: 493
  11064. 2025-07-20 15:27:01,190 - __main__ - INFO - sglang running req: 11 queue req: 493
  11065. 2025-07-20 15:27:01,902 - sglang - INFO - [2025-07-20 15:27:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 492
  11066. 2025-07-20 15:27:01,903 - __main__ - INFO - sglang running req: 10 queue req: 492
  11067. 2025-07-20 15:27:02,965 - sglang - INFO - [2025-07-20 15:27:02 TP0] Decode batch. #running-req: 11, #token: 30988, token usage: 0.82, gen throughput (token/s): 247.19, #queue-req: 492
  11068. 2025-07-20 15:27:02,966 - __main__ - INFO - sglang running req: 11 queue req: 492
  11069. 2025-07-20 15:27:03,405 - sglang - INFO - [2025-07-20 15:27:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 491
  11070. 2025-07-20 15:27:03,405 - __main__ - INFO - sglang running req: 10 queue req: 491
  11071. 2025-07-20 15:27:03,649 - __main__ - INFO - Queue remaining: 2
  11072. 2025-07-20 15:27:03,649 - __main__ - INFO -
  11073. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11074. ----------------------------------------------------------------------------------
  11075. sglang_input_tokens 123.89 123.89
  11076. sglang_output_tokens 29.09 29.09
  11077. 2025-07-20 15:27:03,649 - __main__ - INFO -
  11078. Worker ID | finished | started
  11079. ----------+----------+--------
  11080. 0 | 8 | 500
  11081. 1 | 0 | 10
  11082. 2025-07-20 15:27:04,743 - sglang - INFO - [2025-07-20 15:27:04 TP0] Decode batch. #running-req: 11, #token: 32458, token usage: 0.85, gen throughput (token/s): 246.97, #queue-req: 491
  11083. 2025-07-20 15:27:04,743 - __main__ - INFO - sglang running req: 11 queue req: 491
  11084. 2025-07-20 15:27:04,990 - sglang - INFO - [2025-07-20 15:27:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2822, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 490
  11085. 2025-07-20 15:27:04,990 - __main__ - INFO - sglang running req: 10 queue req: 490
  11086. 2025-07-20 15:27:06,561 - sglang - INFO - [2025-07-20 15:27:06 TP0] Decode batch. #running-req: 11, #token: 32019, token usage: 0.84, gen throughput (token/s): 241.51, #queue-req: 490
  11087. 2025-07-20 15:27:06,561 - __main__ - INFO - sglang running req: 11 queue req: 490
  11088. 2025-07-20 15:27:07,552 - sglang - INFO - [2025-07-20 15:27:07 TP0] Decode batch. #running-req: 11, #token: 32459, token usage: 0.85, gen throughput (token/s): 444.01, #queue-req: 490
  11089. 2025-07-20 15:27:07,552 - __main__ - INFO - sglang running req: 11 queue req: 490
  11090. 2025-07-20 15:27:07,676 - sglang - INFO - [2025-07-20 15:27:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 489
  11091. 2025-07-20 15:27:07,676 - __main__ - INFO - sglang running req: 10 queue req: 489
  11092. 2025-07-20 15:27:09,257 - sglang - INFO - [2025-07-20 15:27:09 TP0] Decode batch. #running-req: 11, #token: 31582, token usage: 0.83, gen throughput (token/s): 257.35, #queue-req: 489
  11093. 2025-07-20 15:27:09,258 - __main__ - INFO - sglang running req: 11 queue req: 489
  11094. 2025-07-20 15:27:09,752 - sglang - INFO - [2025-07-20 15:27:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 488
  11095. 2025-07-20 15:27:09,752 - __main__ - INFO - sglang running req: 10 queue req: 488
  11096. 2025-07-20 15:27:11,038 - sglang - INFO - [2025-07-20 15:27:11 TP0] Decode batch. #running-req: 11, #token: 31144, token usage: 0.82, gen throughput (token/s): 246.48, #queue-req: 488
  11097. 2025-07-20 15:27:11,039 - __main__ - INFO - sglang running req: 11 queue req: 488
  11098. 2025-07-20 15:27:11,187 - sglang - INFO - [2025-07-20 15:27:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1710, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 487
  11099. 2025-07-20 15:27:11,187 - __main__ - INFO - sglang running req: 10 queue req: 487
  11100. 2025-07-20 15:27:12,620 - sglang - INFO - [2025-07-20 15:27:12 TP0] Decode batch. #running-req: 11, #token: 30342, token usage: 0.80, gen throughput (token/s): 277.50, #queue-req: 487
  11101. 2025-07-20 15:27:12,621 - __main__ - INFO - sglang running req: 11 queue req: 487
  11102. 2025-07-20 15:27:13,612 - sglang - INFO - [2025-07-20 15:27:13 TP0] Decode batch. #running-req: 11, #token: 30782, token usage: 0.81, gen throughput (token/s): 443.61, #queue-req: 487
  11103. 2025-07-20 15:27:13,613 - __main__ - INFO - sglang running req: 11 queue req: 487
  11104. 2025-07-20 15:27:13,650 - __main__ - INFO - Queue remaining: 2
  11105. 2025-07-20 15:27:13,650 - __main__ - INFO -
  11106. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11107. ----------------------------------------------------------------------------------
  11108. sglang_input_tokens 191.41 191.41
  11109. sglang_output_tokens 50.01 50.01
  11110. 2025-07-20 15:27:13,650 - __main__ - INFO -
  11111. Worker ID | finished | started
  11112. ----------+----------+--------
  11113. 0 | 12 | 500
  11114. 1 | 0 | 10
  11115. 2025-07-20 15:27:13,825 - sglang - INFO - [2025-07-20 15:27:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 486
  11116. 2025-07-20 15:27:13,825 - __main__ - INFO - sglang running req: 10 queue req: 486
  11117. 2025-07-20 15:27:15,437 - sglang - INFO - [2025-07-20 15:27:15 TP0] Decode batch. #running-req: 11, #token: 31443, token usage: 0.83, gen throughput (token/s): 240.53, #queue-req: 486
  11118. 2025-07-20 15:27:15,437 - __main__ - INFO - sglang running req: 11 queue req: 486
  11119. 2025-07-20 15:27:16,280 - sglang - INFO - [2025-07-20 15:27:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 485
  11120. 2025-07-20 15:27:16,280 - __main__ - INFO - sglang running req: 10 queue req: 485
  11121. 2025-07-20 15:27:17,251 - sglang - INFO - [2025-07-20 15:27:17 TP0] Decode batch. #running-req: 11, #token: 32091, token usage: 0.84, gen throughput (token/s): 241.99, #queue-req: 485
  11122. 2025-07-20 15:27:17,252 - __main__ - INFO - sglang running req: 11 queue req: 485
  11123. 2025-07-20 15:27:18,236 - sglang - INFO - [2025-07-20 15:27:18 TP0] Decode batch. #running-req: 11, #token: 32531, token usage: 0.86, gen throughput (token/s): 446.77, #queue-req: 485
  11124. 2025-07-20 15:27:18,236 - __main__ - INFO - sglang running req: 11 queue req: 485
  11125. 2025-07-20 15:27:18,953 - sglang - INFO - [2025-07-20 15:27:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 484
  11126. 2025-07-20 15:27:18,953 - __main__ - INFO - sglang running req: 10 queue req: 484
  11127. 2025-07-20 15:27:19,970 - sglang - INFO - [2025-07-20 15:27:19 TP0] Decode batch. #running-req: 11, #token: 32159, token usage: 0.85, gen throughput (token/s): 253.26, #queue-req: 484
  11128. 2025-07-20 15:27:19,970 - __main__ - INFO - sglang running req: 11 queue req: 484
  11129. 2025-07-20 15:27:20,960 - sglang - INFO - [2025-07-20 15:27:20 TP0] Decode batch. #running-req: 11, #token: 32599, token usage: 0.86, gen throughput (token/s): 444.41, #queue-req: 484
  11130. 2025-07-20 15:27:20,960 - __main__ - INFO - sglang running req: 11 queue req: 484
  11131. 2025-07-20 15:27:21,949 - sglang - INFO - [2025-07-20 15:27:21 TP0] Decode batch. #running-req: 10, #token: 29770, token usage: 0.78, gen throughput (token/s): 434.77, #queue-req: 484
  11132. 2025-07-20 15:27:21,949 - __main__ - INFO - sglang running req: 10 queue req: 484
  11133. 2025-07-20 15:27:22,932 - sglang - INFO - [2025-07-20 15:27:22 TP0] Decode batch. #running-req: 10, #token: 30170, token usage: 0.79, gen throughput (token/s): 406.80, #queue-req: 484
  11134. 2025-07-20 15:27:22,933 - __main__ - INFO - sglang running req: 10 queue req: 484
  11135. 2025-07-20 15:27:23,652 - __main__ - INFO - Queue remaining: 2
  11136. 2025-07-20 15:27:23,652 - __main__ - INFO -
  11137. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11138. ----------------------------------------------------------------------------------
  11139. sglang_input_tokens 238.94 238.94
  11140. sglang_output_tokens 65.94 65.94
  11141. 2025-07-20 15:27:23,653 - __main__ - INFO -
  11142. Worker ID | finished | started
  11143. ----------+----------+--------
  11144. 0 | 16 | 500
  11145. 1 | 0 | 10
  11146. 2025-07-20 15:27:23,916 - sglang - INFO - [2025-07-20 15:27:23 TP0] Decode batch. #running-req: 10, #token: 30570, token usage: 0.80, gen throughput (token/s): 406.47, #queue-req: 484
  11147. 2025-07-20 15:27:23,917 - __main__ - INFO - sglang running req: 10 queue req: 484
  11148. 2025-07-20 15:27:24,899 - sglang - INFO - [2025-07-20 15:27:24 TP0] Decode batch. #running-req: 10, #token: 30970, token usage: 0.82, gen throughput (token/s): 406.94, #queue-req: 484
  11149. 2025-07-20 15:27:24,900 - __main__ - INFO - sglang running req: 10 queue req: 484
  11150. 2025-07-20 15:27:25,685 - sglang - INFO - [2025-07-20 15:27:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 483
  11151. 2025-07-20 15:27:25,686 - __main__ - INFO - sglang running req: 9 queue req: 483
  11152. 2025-07-20 15:27:26,703 - sglang - INFO - [2025-07-20 15:27:26 TP0] Decode batch. #running-req: 10, #token: 32049, token usage: 0.84, gen throughput (token/s): 221.21, #queue-req: 483
  11153. 2025-07-20 15:27:26,703 - __main__ - INFO - sglang running req: 10 queue req: 483
  11154. 2025-07-20 15:27:27,024 - sglang - INFO - [2025-07-20 15:27:27 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 482
  11155. 2025-07-20 15:27:27,024 - __main__ - INFO - sglang running req: 9 queue req: 482
  11156. 2025-07-20 15:27:28,412 - sglang - INFO - [2025-07-20 15:27:28 TP0] Decode batch. #running-req: 10, #token: 31881, token usage: 0.84, gen throughput (token/s): 233.51, #queue-req: 482
  11157. 2025-07-20 15:27:28,412 - __main__ - INFO - sglang running req: 10 queue req: 482
  11158. 2025-07-20 15:27:29,398 - sglang - INFO - [2025-07-20 15:27:29 TP0] Decode batch. #running-req: 10, #token: 32281, token usage: 0.85, gen throughput (token/s): 405.47, #queue-req: 482
  11159. 2025-07-20 15:27:29,398 - __main__ - INFO - sglang running req: 10 queue req: 482
  11160. 2025-07-20 15:27:30,386 - sglang - INFO - [2025-07-20 15:27:30 TP0] Decode batch. #running-req: 10, #token: 32681, token usage: 0.86, gen throughput (token/s): 404.88, #queue-req: 482
  11161. 2025-07-20 15:27:30,386 - __main__ - INFO - sglang running req: 10 queue req: 482
  11162. 2025-07-20 15:27:30,832 - sglang - INFO - [2025-07-20 15:27:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 481
  11163. 2025-07-20 15:27:30,832 - __main__ - INFO - sglang running req: 9 queue req: 481
  11164. 2025-07-20 15:27:32,043 - sglang - INFO - [2025-07-20 15:27:32 TP0] Prefill batch. #new-seq: 1, #new-token: 1883, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 480
  11165. 2025-07-20 15:27:32,043 - __main__ - INFO - sglang running req: 9 queue req: 480
  11166. 2025-07-20 15:27:32,761 - sglang - INFO - [2025-07-20 15:27:32 TP0] Decode batch. #running-req: 10, #token: 30114, token usage: 0.79, gen throughput (token/s): 167.57, #queue-req: 480
  11167. 2025-07-20 15:27:32,761 - __main__ - INFO - sglang running req: 10 queue req: 480
  11168. 2025-07-20 15:27:33,400 - sglang - INFO - [2025-07-20 15:27:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2042, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 479
  11169. 2025-07-20 15:27:33,400 - __main__ - INFO - sglang running req: 9 queue req: 479
  11170. 2025-07-20 15:27:33,654 - __main__ - INFO - Queue remaining: 2
  11171. 2025-07-20 15:27:33,654 - __main__ - INFO -
  11172. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11173. ----------------------------------------------------------------------------------
  11174. sglang_input_tokens 301.00 301.00
  11175. sglang_output_tokens 84.35 84.35
  11176. 2025-07-20 15:27:33,654 - __main__ - INFO -
  11177. Worker ID | finished | started
  11178. ----------+----------+--------
  11179. 0 | 21 | 500
  11180. 1 | 0 | 10
  11181. 2025-07-20 15:27:34,397 - sglang - INFO - [2025-07-20 15:27:34 TP0] Decode batch. #running-req: 10, #token: 28936, token usage: 0.76, gen throughput (token/s): 243.99, #queue-req: 479
  11182. 2025-07-20 15:27:34,397 - __main__ - INFO - sglang running req: 10 queue req: 479
  11183. 2025-07-20 15:27:35,380 - sglang - INFO - [2025-07-20 15:27:35 TP0] Decode batch. #running-req: 10, #token: 29336, token usage: 0.77, gen throughput (token/s): 406.83, #queue-req: 479
  11184. 2025-07-20 15:27:35,380 - __main__ - INFO - sglang running req: 10 queue req: 479
  11185. 2025-07-20 15:27:35,994 - sglang - INFO - [2025-07-20 15:27:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2507, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 478
  11186. 2025-07-20 15:27:35,994 - __main__ - INFO - sglang running req: 9 queue req: 478
  11187. 2025-07-20 15:27:37,112 - sglang - INFO - [2025-07-20 15:27:37 TP0] Decode batch. #running-req: 10, #token: 28526, token usage: 0.75, gen throughput (token/s): 230.30, #queue-req: 478
  11188. 2025-07-20 15:27:37,112 - __main__ - INFO - sglang running req: 10 queue req: 478
  11189. 2025-07-20 15:27:38,069 - sglang - INFO - [2025-07-20 15:27:38 TP0] Prefill batch. #new-seq: 2, #new-token: 3856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 476
  11190. 2025-07-20 15:27:38,069 - __main__ - INFO - sglang running req: 9 queue req: 476
  11191. 2025-07-20 15:27:39,349 - sglang - INFO - [2025-07-20 15:27:39 TP0] Decode batch. #running-req: 11, #token: 29220, token usage: 0.77, gen throughput (token/s): 178.84, #queue-req: 476
  11192. 2025-07-20 15:27:39,349 - __main__ - INFO - sglang running req: 11 queue req: 476
  11193. 2025-07-20 15:27:40,331 - sglang - INFO - [2025-07-20 15:27:40 TP0] Decode batch. #running-req: 11, #token: 29660, token usage: 0.78, gen throughput (token/s): 447.87, #queue-req: 476
  11194. 2025-07-20 15:27:40,332 - __main__ - INFO - sglang running req: 11 queue req: 476
  11195. 2025-07-20 15:27:41,315 - sglang - INFO - [2025-07-20 15:27:41 TP0] Decode batch. #running-req: 11, #token: 30100, token usage: 0.79, gen throughput (token/s): 447.58, #queue-req: 476
  11196. 2025-07-20 15:27:41,315 - __main__ - INFO - sglang running req: 11 queue req: 476
  11197. 2025-07-20 15:27:42,301 - sglang - INFO - [2025-07-20 15:27:42 TP0] Decode batch. #running-req: 11, #token: 30540, token usage: 0.80, gen throughput (token/s): 446.15, #queue-req: 476
  11198. 2025-07-20 15:27:42,301 - __main__ - INFO - sglang running req: 11 queue req: 476
  11199. 2025-07-20 15:27:42,376 - sglang - INFO - [2025-07-20 15:27:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2822, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 475
  11200. 2025-07-20 15:27:42,376 - __main__ - INFO - sglang running req: 10 queue req: 475
  11201. 2025-07-20 15:27:43,656 - __main__ - INFO - Queue remaining: 2
  11202. 2025-07-20 15:27:43,656 - __main__ - INFO -
  11203. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11204. ----------------------------------------------------------------------------------
  11205. sglang_input_tokens 333.23 333.23
  11206. sglang_output_tokens 94.40 94.40
  11207. 2025-07-20 15:27:43,656 - __main__ - INFO -
  11208. Worker ID | finished | started
  11209. ----------+----------+--------
  11210. 0 | 24 | 500
  11211. 1 | 0 | 10
  11212. 2025-07-20 15:27:44,111 - sglang - INFO - [2025-07-20 15:27:44 TP0] Decode batch. #running-req: 11, #token: 30048, token usage: 0.79, gen throughput (token/s): 242.43, #queue-req: 475
  11213. 2025-07-20 15:27:44,112 - __main__ - INFO - sglang running req: 11 queue req: 475
  11214. 2025-07-20 15:27:44,235 - sglang - INFO - [2025-07-20 15:27:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 474
  11215. 2025-07-20 15:27:44,236 - __main__ - INFO - sglang running req: 10 queue req: 474
  11216. 2025-07-20 15:27:45,905 - sglang - INFO - [2025-07-20 15:27:45 TP0] Decode batch. #running-req: 11, #token: 31025, token usage: 0.82, gen throughput (token/s): 244.73, #queue-req: 474
  11217. 2025-07-20 15:27:45,906 - __main__ - INFO - sglang running req: 11 queue req: 474
  11218. 2025-07-20 15:27:46,352 - sglang - INFO - [2025-07-20 15:27:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1592, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 473
  11219. 2025-07-20 15:27:46,352 - __main__ - INFO - sglang running req: 10 queue req: 473
  11220. 2025-07-20 15:27:47,472 - sglang - INFO - [2025-07-20 15:27:47 TP0] Decode batch. #running-req: 11, #token: 29826, token usage: 0.79, gen throughput (token/s): 280.24, #queue-req: 473
  11221. 2025-07-20 15:27:47,472 - __main__ - INFO - sglang running req: 11 queue req: 473
  11222. 2025-07-20 15:27:47,718 - sglang - INFO - [2025-07-20 15:27:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2780, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 472
  11223. 2025-07-20 15:27:47,719 - __main__ - INFO - sglang running req: 10 queue req: 472
  11224. 2025-07-20 15:27:49,262 - sglang - INFO - [2025-07-20 15:27:49 TP0] Decode batch. #running-req: 11, #token: 30186, token usage: 0.79, gen throughput (token/s): 245.28, #queue-req: 472
  11225. 2025-07-20 15:27:49,262 - __main__ - INFO - sglang running req: 11 queue req: 472
  11226. 2025-07-20 15:27:50,250 - sglang - INFO - [2025-07-20 15:27:50 TP0] Decode batch. #running-req: 11, #token: 30626, token usage: 0.81, gen throughput (token/s): 444.99, #queue-req: 472
  11227. 2025-07-20 15:27:50,251 - __main__ - INFO - sglang running req: 11 queue req: 472
  11228. 2025-07-20 15:27:50,399 - sglang - INFO - [2025-07-20 15:27:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2750, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 471
  11229. 2025-07-20 15:27:50,399 - __main__ - INFO - sglang running req: 10 queue req: 471
  11230. 2025-07-20 15:27:52,040 - sglang - INFO - [2025-07-20 15:27:52 TP0] Decode batch. #running-req: 11, #token: 29925, token usage: 0.79, gen throughput (token/s): 245.29, #queue-req: 471
  11231. 2025-07-20 15:27:52,040 - __main__ - INFO - sglang running req: 11 queue req: 471
  11232. 2025-07-20 15:27:53,027 - sglang - INFO - [2025-07-20 15:27:53 TP0] Decode batch. #running-req: 11, #token: 30365, token usage: 0.80, gen throughput (token/s): 445.84, #queue-req: 471
  11233. 2025-07-20 15:27:53,027 - __main__ - INFO - sglang running req: 11 queue req: 471
  11234. 2025-07-20 15:27:53,657 - __main__ - INFO - Queue remaining: 2
  11235. 2025-07-20 15:27:53,658 - __main__ - INFO -
  11236. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11237. ----------------------------------------------------------------------------------
  11238. sglang_input_tokens 367.79 367.79
  11239. sglang_output_tokens 103.86 103.86
  11240. 2025-07-20 15:27:53,658 - __main__ - INFO -
  11241. Worker ID | finished | started
  11242. ----------+----------+--------
  11243. 0 | 28 | 500
  11244. 1 | 0 | 10
  11245. 2025-07-20 15:27:54,054 - sglang - INFO - [2025-07-20 15:27:54 TP0] Decode batch. #running-req: 11, #token: 30805, token usage: 0.81, gen throughput (token/s): 428.41, #queue-req: 471
  11246. 2025-07-20 15:27:54,054 - __main__ - INFO - sglang running req: 11 queue req: 471
  11247. 2025-07-20 15:27:54,698 - sglang - INFO - [2025-07-20 15:27:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 470
  11248. 2025-07-20 15:27:54,698 - __main__ - INFO - sglang running req: 10 queue req: 470
  11249. 2025-07-20 15:27:55,623 - sglang - INFO - [2025-07-20 15:27:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 469
  11250. 2025-07-20 15:27:55,623 - __main__ - INFO - sglang running req: 10 queue req: 469
  11251. 2025-07-20 15:27:56,603 - sglang - INFO - [2025-07-20 15:27:56 TP0] Decode batch. #running-req: 11, #token: 31689, token usage: 0.83, gen throughput (token/s): 171.86, #queue-req: 469
  11252. 2025-07-20 15:27:56,603 - __main__ - INFO - sglang running req: 11 queue req: 469
  11253. 2025-07-20 15:27:57,589 - sglang - INFO - [2025-07-20 15:27:57 TP0] Decode batch. #running-req: 10, #token: 30235, token usage: 0.80, gen throughput (token/s): 413.82, #queue-req: 469
  11254. 2025-07-20 15:27:57,589 - __main__ - INFO - sglang running req: 10 queue req: 469
  11255. 2025-07-20 15:27:57,688 - sglang - INFO - [2025-07-20 15:27:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2791, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 468
  11256. 2025-07-20 15:27:57,688 - __main__ - INFO - sglang running req: 9 queue req: 468
  11257. 2025-07-20 15:27:59,378 - sglang - INFO - [2025-07-20 15:27:59 TP0] Decode batch. #running-req: 10, #token: 29719, token usage: 0.78, gen throughput (token/s): 223.00, #queue-req: 468
  11258. 2025-07-20 15:27:59,378 - __main__ - INFO - sglang running req: 10 queue req: 468
  11259. 2025-07-20 15:28:00,381 - sglang - INFO - [2025-07-20 15:28:00 TP0] Decode batch. #running-req: 10, #token: 30119, token usage: 0.79, gen throughput (token/s): 398.66, #queue-req: 468
  11260. 2025-07-20 15:28:00,382 - __main__ - INFO - sglang running req: 10 queue req: 468
  11261. 2025-07-20 15:28:01,350 - sglang - INFO - [2025-07-20 15:28:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 467
  11262. 2025-07-20 15:28:01,350 - __main__ - INFO - sglang running req: 9 queue req: 467
  11263. 2025-07-20 15:28:02,176 - sglang - INFO - [2025-07-20 15:28:02 TP0] Decode batch. #running-req: 10, #token: 29762, token usage: 0.78, gen throughput (token/s): 222.33, #queue-req: 467
  11264. 2025-07-20 15:28:02,176 - __main__ - INFO - sglang running req: 10 queue req: 467
  11265. 2025-07-20 15:28:03,154 - sglang - INFO - [2025-07-20 15:28:03 TP0] Decode batch. #running-req: 10, #token: 30162, token usage: 0.79, gen throughput (token/s): 408.86, #queue-req: 467
  11266. 2025-07-20 15:28:03,155 - __main__ - INFO - sglang running req: 10 queue req: 467
  11267. 2025-07-20 15:28:03,252 - sglang - INFO - [2025-07-20 15:28:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 466
  11268. 2025-07-20 15:28:03,253 - __main__ - INFO - sglang running req: 9 queue req: 466
  11269. 2025-07-20 15:28:03,659 - __main__ - INFO - Queue remaining: 2
  11270. 2025-07-20 15:28:03,660 - __main__ - INFO -
  11271. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11272. ----------------------------------------------------------------------------------
  11273. sglang_input_tokens 417.42 417.42
  11274. sglang_output_tokens 118.15 118.15
  11275. 2025-07-20 15:28:03,660 - __main__ - INFO -
  11276. Worker ID | finished | started
  11277. ----------+----------+--------
  11278. 0 | 34 | 500
  11279. 1 | 0 | 10
  11280. 2025-07-20 15:28:04,737 - sglang - INFO - [2025-07-20 15:28:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 465
  11281. 2025-07-20 15:28:04,738 - __main__ - INFO - sglang running req: 9 queue req: 465
  11282. 2025-07-20 15:28:05,531 - sglang - INFO - [2025-07-20 15:28:05 TP0] Decode batch. #running-req: 10, #token: 28889, token usage: 0.76, gen throughput (token/s): 167.48, #queue-req: 465
  11283. 2025-07-20 15:28:05,531 - __main__ - INFO - sglang running req: 10 queue req: 465
  11284. 2025-07-20 15:28:06,512 - sglang - INFO - [2025-07-20 15:28:06 TP0] Decode batch. #running-req: 10, #token: 29289, token usage: 0.77, gen throughput (token/s): 407.80, #queue-req: 465
  11285. 2025-07-20 15:28:06,512 - __main__ - INFO - sglang running req: 10 queue req: 465
  11286. 2025-07-20 15:28:07,493 - sglang - INFO - [2025-07-20 15:28:07 TP0] Decode batch. #running-req: 10, #token: 29689, token usage: 0.78, gen throughput (token/s): 407.51, #queue-req: 465
  11287. 2025-07-20 15:28:07,493 - __main__ - INFO - sglang running req: 10 queue req: 465
  11288. 2025-07-20 15:28:08,477 - sglang - INFO - [2025-07-20 15:28:08 TP0] Decode batch. #running-req: 10, #token: 30089, token usage: 0.79, gen throughput (token/s): 406.69, #queue-req: 465
  11289. 2025-07-20 15:28:08,477 - __main__ - INFO - sglang running req: 10 queue req: 465
  11290. 2025-07-20 15:28:09,460 - sglang - INFO - [2025-07-20 15:28:09 TP0] Decode batch. #running-req: 10, #token: 30489, token usage: 0.80, gen throughput (token/s): 406.61, #queue-req: 465
  11291. 2025-07-20 15:28:09,461 - __main__ - INFO - sglang running req: 10 queue req: 465
  11292. 2025-07-20 15:28:10,444 - sglang - INFO - [2025-07-20 15:28:10 TP0] Decode batch. #running-req: 10, #token: 30889, token usage: 0.81, gen throughput (token/s): 406.51, #queue-req: 465
  11293. 2025-07-20 15:28:10,445 - __main__ - INFO - sglang running req: 10 queue req: 465
  11294. 2025-07-20 15:28:11,429 - sglang - INFO - [2025-07-20 15:28:11 TP0] Decode batch. #running-req: 10, #token: 31289, token usage: 0.82, gen throughput (token/s): 406.23, #queue-req: 465
  11295. 2025-07-20 15:28:11,429 - __main__ - INFO - sglang running req: 10 queue req: 465
  11296. 2025-07-20 15:28:12,146 - sglang - INFO - [2025-07-20 15:28:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2512, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 464
  11297. 2025-07-20 15:28:12,146 - __main__ - INFO - sglang running req: 9 queue req: 464
  11298. 2025-07-20 15:28:12,924 - sglang - INFO - [2025-07-20 15:28:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 463
  11299. 2025-07-20 15:28:12,924 - __main__ - INFO - sglang running req: 9 queue req: 463
  11300. 2025-07-20 15:28:13,662 - __main__ - INFO - Queue remaining: 2
  11301. 2025-07-20 15:28:13,663 - __main__ - INFO -
  11302. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11303. ----------------------------------------------------------------------------------
  11304. sglang_input_tokens 433.08 433.08
  11305. sglang_output_tokens 122.55 122.55
  11306. 2025-07-20 15:28:13,663 - __main__ - INFO -
  11307. Worker ID | finished | started
  11308. ----------+----------+--------
  11309. 0 | 37 | 500
  11310. 1 | 0 | 10
  11311. 2025-07-20 15:28:13,918 - sglang - INFO - [2025-07-20 15:28:13 TP0] Decode batch. #running-req: 10, #token: 30091, token usage: 0.79, gen throughput (token/s): 159.91, #queue-req: 463
  11312. 2025-07-20 15:28:13,918 - __main__ - INFO - sglang running req: 10 queue req: 463
  11313. 2025-07-20 15:28:14,657 - sglang - INFO - [2025-07-20 15:28:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2791, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 462
  11314. 2025-07-20 15:28:14,657 - __main__ - INFO - sglang running req: 9 queue req: 462
  11315. 2025-07-20 15:28:15,711 - sglang - INFO - [2025-07-20 15:28:15 TP0] Decode batch. #running-req: 10, #token: 29553, token usage: 0.78, gen throughput (token/s): 222.51, #queue-req: 462
  11316. 2025-07-20 15:28:15,711 - __main__ - INFO - sglang running req: 10 queue req: 462
  11317. 2025-07-20 15:28:16,645 - sglang - INFO - [2025-07-20 15:28:16 TP0] Prefill batch. #new-seq: 1, #new-token: 1663, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 461
  11318. 2025-07-20 15:28:16,646 - __main__ - INFO - sglang running req: 9 queue req: 461
  11319. 2025-07-20 15:28:17,273 - sglang - INFO - [2025-07-20 15:28:17 TP0] Decode batch. #running-req: 10, #token: 28008, token usage: 0.74, gen throughput (token/s): 255.48, #queue-req: 461
  11320. 2025-07-20 15:28:17,273 - __main__ - INFO - sglang running req: 10 queue req: 461
  11321. 2025-07-20 15:28:18,290 - sglang - INFO - [2025-07-20 15:28:18 TP0] Decode batch. #running-req: 10, #token: 28408, token usage: 0.75, gen throughput (token/s): 393.11, #queue-req: 461
  11322. 2025-07-20 15:28:18,291 - __main__ - INFO - sglang running req: 10 queue req: 461
  11323. 2025-07-20 15:28:19,268 - sglang - INFO - [2025-07-20 15:28:19 TP0] Decode batch. #running-req: 10, #token: 28808, token usage: 0.76, gen throughput (token/s): 408.98, #queue-req: 461
  11324. 2025-07-20 15:28:19,269 - __main__ - INFO - sglang running req: 10 queue req: 461
  11325. 2025-07-20 15:28:20,250 - sglang - INFO - [2025-07-20 15:28:20 TP0] Decode batch. #running-req: 10, #token: 29208, token usage: 0.77, gen throughput (token/s): 407.59, #queue-req: 461
  11326. 2025-07-20 15:28:20,250 - __main__ - INFO - sglang running req: 10 queue req: 461
  11327. 2025-07-20 15:28:21,234 - sglang - INFO - [2025-07-20 15:28:21 TP0] Decode batch. #running-req: 10, #token: 29608, token usage: 0.78, gen throughput (token/s): 406.31, #queue-req: 461
  11328. 2025-07-20 15:28:21,235 - __main__ - INFO - sglang running req: 10 queue req: 461
  11329. 2025-07-20 15:28:21,530 - sglang - INFO - [2025-07-20 15:28:21 TP0] Prefill batch. #new-seq: 2, #new-token: 3204, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 9, #queue-req: 459
  11330. 2025-07-20 15:28:21,530 - __main__ - INFO - sglang running req: 9 queue req: 459
  11331. 2025-07-20 15:28:22,688 - sglang - INFO - [2025-07-20 15:28:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1710, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 458
  11332. 2025-07-20 15:28:22,688 - __main__ - INFO - sglang running req: 10 queue req: 458
  11333. 2025-07-20 15:28:23,664 - __main__ - INFO - Queue remaining: 2
  11334. 2025-07-20 15:28:23,664 - __main__ - INFO -
  11335. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11336. ----------------------------------------------------------------------------------
  11337. sglang_input_tokens 463.22 463.22
  11338. sglang_output_tokens 132.10 132.10
  11339. 2025-07-20 15:28:23,665 - __main__ - INFO -
  11340. Worker ID | finished | started
  11341. ----------+----------+--------
  11342. 0 | 41 | 500
  11343. 1 | 0 | 10
  11344. 2025-07-20 15:28:23,958 - sglang - INFO - [2025-07-20 15:28:23 TP0] Decode batch. #running-req: 11, #token: 28402, token usage: 0.75, gen throughput (token/s): 156.37, #queue-req: 458
  11345. 2025-07-20 15:28:23,959 - __main__ - INFO - sglang running req: 11 queue req: 458
  11346. 2025-07-20 15:28:24,552 - sglang - INFO - [2025-07-20 15:28:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2857, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 457
  11347. 2025-07-20 15:28:24,552 - __main__ - INFO - sglang running req: 10 queue req: 457
  11348. 2025-07-20 15:28:25,780 - sglang - INFO - [2025-07-20 15:28:25 TP0] Decode batch. #running-req: 10, #token: 28601, token usage: 0.75, gen throughput (token/s): 240.46, #queue-req: 457
  11349. 2025-07-20 15:28:25,780 - __main__ - INFO - sglang running req: 10 queue req: 457
  11350. 2025-07-20 15:28:25,781 - sglang - INFO - [2025-07-20 15:28:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2716, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 456
  11351. 2025-07-20 15:28:25,781 - __main__ - INFO - sglang running req: 10 queue req: 456
  11352. 2025-07-20 15:28:26,738 - sglang - INFO - [2025-07-20 15:28:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 455
  11353. 2025-07-20 15:28:26,738 - __main__ - INFO - sglang running req: 10 queue req: 455
  11354. 2025-07-20 15:28:28,396 - sglang - INFO - [2025-07-20 15:28:28 TP0] Decode batch. #running-req: 11, #token: 30815, token usage: 0.81, gen throughput (token/s): 167.80, #queue-req: 455
  11355. 2025-07-20 15:28:28,397 - __main__ - INFO - sglang running req: 11 queue req: 455
  11356. 2025-07-20 15:28:29,390 - sglang - INFO - [2025-07-20 15:28:29 TP0] Decode batch. #running-req: 10, #token: 27588, token usage: 0.73, gen throughput (token/s): 441.72, #queue-req: 455
  11357. 2025-07-20 15:28:29,390 - __main__ - INFO - sglang running req: 10 queue req: 455
  11358. 2025-07-20 15:28:29,390 - sglang - INFO - [2025-07-20 15:28:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1663, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 454
  11359. 2025-07-20 15:28:29,391 - __main__ - INFO - sglang running req: 10 queue req: 454
  11360. 2025-07-20 15:28:30,615 - sglang - INFO - [2025-07-20 15:28:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 453
  11361. 2025-07-20 15:28:30,616 - __main__ - INFO - sglang running req: 10 queue req: 453
  11362. 2025-07-20 15:28:31,500 - sglang - INFO - [2025-07-20 15:28:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1286, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 452
  11363. 2025-07-20 15:28:31,501 - __main__ - INFO - sglang running req: 10 queue req: 452
  11364. 2025-07-20 15:28:32,242 - sglang - INFO - [2025-07-20 15:28:32 TP0] Decode batch. #running-req: 11, #token: 27187, token usage: 0.72, gen throughput (token/s): 153.55, #queue-req: 452
  11365. 2025-07-20 15:28:32,243 - __main__ - INFO - sglang running req: 11 queue req: 452
  11366. 2025-07-20 15:28:33,224 - sglang - INFO - [2025-07-20 15:28:33 TP0] Decode batch. #running-req: 11, #token: 27627, token usage: 0.73, gen throughput (token/s): 448.21, #queue-req: 452
  11367. 2025-07-20 15:28:33,224 - __main__ - INFO - sglang running req: 11 queue req: 452
  11368. 2025-07-20 15:28:33,666 - __main__ - INFO - Queue remaining: 2
  11369. 2025-07-20 15:28:33,667 - __main__ - INFO -
  11370. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11371. ----------------------------------------------------------------------------------
  11372. sglang_input_tokens 501.45 501.45
  11373. sglang_output_tokens 142.17 142.17
  11374. 2025-07-20 15:28:33,667 - __main__ - INFO -
  11375. Worker ID | finished | started
  11376. ----------+----------+--------
  11377. 0 | 47 | 500
  11378. 1 | 0 | 10
  11379. 2025-07-20 15:28:34,206 - sglang - INFO - [2025-07-20 15:28:34 TP0] Decode batch. #running-req: 11, #token: 28067, token usage: 0.74, gen throughput (token/s): 447.94, #queue-req: 452
  11380. 2025-07-20 15:28:34,207 - __main__ - INFO - sglang running req: 11 queue req: 452
  11381. 2025-07-20 15:28:34,873 - sglang - INFO - [2025-07-20 15:28:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 451
  11382. 2025-07-20 15:28:34,873 - __main__ - INFO - sglang running req: 10 queue req: 451
  11383. 2025-07-20 15:28:35,947 - sglang - INFO - [2025-07-20 15:28:35 TP0] Decode batch. #running-req: 11, #token: 29499, token usage: 0.78, gen throughput (token/s): 252.20, #queue-req: 451
  11384. 2025-07-20 15:28:35,947 - __main__ - INFO - sglang running req: 11 queue req: 451
  11385. 2025-07-20 15:28:36,937 - sglang - INFO - [2025-07-20 15:28:36 TP0] Decode batch. #running-req: 11, #token: 29939, token usage: 0.79, gen throughput (token/s): 444.63, #queue-req: 451
  11386. 2025-07-20 15:28:36,937 - __main__ - INFO - sglang running req: 11 queue req: 451
  11387. 2025-07-20 15:28:37,927 - sglang - INFO - [2025-07-20 15:28:37 TP0] Decode batch. #running-req: 11, #token: 30379, token usage: 0.80, gen throughput (token/s): 444.37, #queue-req: 451
  11388. 2025-07-20 15:28:37,927 - __main__ - INFO - sglang running req: 11 queue req: 451
  11389. 2025-07-20 15:28:37,977 - sglang - INFO - [2025-07-20 15:28:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2803, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 450
  11390. 2025-07-20 15:28:37,977 - __main__ - INFO - sglang running req: 10 queue req: 450
  11391. 2025-07-20 15:28:39,732 - sglang - INFO - [2025-07-20 15:28:39 TP0] Decode batch. #running-req: 11, #token: 31717, token usage: 0.83, gen throughput (token/s): 243.22, #queue-req: 450
  11392. 2025-07-20 15:28:39,732 - __main__ - INFO - sglang running req: 11 queue req: 450
  11393. 2025-07-20 15:28:40,724 - sglang - INFO - [2025-07-20 15:28:40 TP0] Decode batch. #running-req: 10, #token: 29980, token usage: 0.79, gen throughput (token/s): 441.39, #queue-req: 450
  11394. 2025-07-20 15:28:40,724 - __main__ - INFO - sglang running req: 10 queue req: 450
  11395. 2025-07-20 15:28:41,684 - sglang - INFO - [2025-07-20 15:28:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 449
  11396. 2025-07-20 15:28:41,684 - __main__ - INFO - sglang running req: 9 queue req: 449
  11397. 2025-07-20 15:28:42,545 - sglang - INFO - [2025-07-20 15:28:42 TP0] Decode batch. #running-req: 10, #token: 30017, token usage: 0.79, gen throughput (token/s): 219.15, #queue-req: 449
  11398. 2025-07-20 15:28:42,545 - __main__ - INFO - sglang running req: 10 queue req: 449
  11399. 2025-07-20 15:28:43,534 - sglang - INFO - [2025-07-20 15:28:43 TP0] Decode batch. #running-req: 10, #token: 30417, token usage: 0.80, gen throughput (token/s): 404.61, #queue-req: 449
  11400. 2025-07-20 15:28:43,534 - __main__ - INFO - sglang running req: 10 queue req: 449
  11401. 2025-07-20 15:28:43,669 - __main__ - INFO - Queue remaining: 2
  11402. 2025-07-20 15:28:43,670 - __main__ - INFO -
  11403. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11404. ----------------------------------------------------------------------------------
  11405. sglang_input_tokens 510.83 510.83
  11406. sglang_output_tokens 142.92 142.92
  11407. 2025-07-20 15:28:43,670 - __main__ - INFO -
  11408. Worker ID | finished | started
  11409. ----------+----------+--------
  11410. 0 | 51 | 500
  11411. 1 | 0 | 10
  11412. 2025-07-20 15:28:44,524 - sglang - INFO - [2025-07-20 15:28:44 TP0] Decode batch. #running-req: 10, #token: 30817, token usage: 0.81, gen throughput (token/s): 403.87, #queue-req: 449
  11413. 2025-07-20 15:28:44,524 - __main__ - INFO - sglang running req: 10 queue req: 449
  11414. 2025-07-20 15:28:44,920 - sglang - INFO - [2025-07-20 15:28:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 448
  11415. 2025-07-20 15:28:44,920 - __main__ - INFO - sglang running req: 9 queue req: 448
  11416. 2025-07-20 15:28:46,250 - sglang - INFO - [2025-07-20 15:28:46 TP0] Decode batch. #running-req: 10, #token: 30369, token usage: 0.80, gen throughput (token/s): 231.18, #queue-req: 448
  11417. 2025-07-20 15:28:46,250 - __main__ - INFO - sglang running req: 10 queue req: 448
  11418. 2025-07-20 15:28:46,993 - sglang - INFO - [2025-07-20 15:28:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1863, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 447
  11419. 2025-07-20 15:28:46,993 - __main__ - INFO - sglang running req: 9 queue req: 447
  11420. 2025-07-20 15:28:47,766 - sglang - INFO - [2025-07-20 15:28:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2730, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 446
  11421. 2025-07-20 15:28:47,767 - __main__ - INFO - sglang running req: 9 queue req: 446
  11422. 2025-07-20 15:28:48,698 - sglang - INFO - [2025-07-20 15:28:48 TP0] Decode batch. #running-req: 10, #token: 28965, token usage: 0.76, gen throughput (token/s): 162.56, #queue-req: 446
  11423. 2025-07-20 15:28:48,698 - __main__ - INFO - sglang running req: 10 queue req: 446
  11424. 2025-07-20 15:28:49,728 - sglang - INFO - [2025-07-20 15:28:49 TP0] Decode batch. #running-req: 10, #token: 29365, token usage: 0.77, gen throughput (token/s): 388.26, #queue-req: 446
  11425. 2025-07-20 15:28:49,728 - __main__ - INFO - sglang running req: 10 queue req: 446
  11426. 2025-07-20 15:28:50,296 - sglang - INFO - [2025-07-20 15:28:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1611, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 445
  11427. 2025-07-20 15:28:50,296 - __main__ - INFO - sglang running req: 9 queue req: 445
  11428. 2025-07-20 15:28:51,297 - sglang - INFO - [2025-07-20 15:28:51 TP0] Decode batch. #running-req: 10, #token: 28516, token usage: 0.75, gen throughput (token/s): 254.34, #queue-req: 445
  11429. 2025-07-20 15:28:51,297 - __main__ - INFO - sglang running req: 10 queue req: 445
  11430. 2025-07-20 15:28:52,281 - sglang - INFO - [2025-07-20 15:28:52 TP0] Decode batch. #running-req: 10, #token: 28916, token usage: 0.76, gen throughput (token/s): 406.77, #queue-req: 445
  11431. 2025-07-20 15:28:52,281 - __main__ - INFO - sglang running req: 10 queue req: 445
  11432. 2025-07-20 15:28:53,264 - sglang - INFO - [2025-07-20 15:28:53 TP0] Decode batch. #running-req: 10, #token: 29316, token usage: 0.77, gen throughput (token/s): 406.56, #queue-req: 445
  11433. 2025-07-20 15:28:53,265 - __main__ - INFO - sglang running req: 10 queue req: 445
  11434. 2025-07-20 15:28:53,671 - __main__ - INFO - Queue remaining: 2
  11435. 2025-07-20 15:28:53,672 - __main__ - INFO -
  11436. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11437. ----------------------------------------------------------------------------------
  11438. sglang_input_tokens 529.35 529.35
  11439. sglang_output_tokens 149.32 149.32
  11440. 2025-07-20 15:28:53,672 - __main__ - INFO -
  11441. Worker ID | finished | started
  11442. ----------+----------+--------
  11443. 0 | 55 | 500
  11444. 1 | 0 | 10
  11445. 2025-07-20 15:28:54,251 - sglang - INFO - [2025-07-20 15:28:54 TP0] Decode batch. #running-req: 10, #token: 29716, token usage: 0.78, gen throughput (token/s): 405.56, #queue-req: 445
  11446. 2025-07-20 15:28:54,251 - __main__ - INFO - sglang running req: 10 queue req: 445
  11447. 2025-07-20 15:28:55,236 - sglang - INFO - [2025-07-20 15:28:55 TP0] Decode batch. #running-req: 10, #token: 30116, token usage: 0.79, gen throughput (token/s): 406.12, #queue-req: 445
  11448. 2025-07-20 15:28:55,236 - __main__ - INFO - sglang running req: 10 queue req: 445
  11449. 2025-07-20 15:28:55,457 - sglang - INFO - [2025-07-20 15:28:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 444
  11450. 2025-07-20 15:28:55,457 - __main__ - INFO - sglang running req: 9 queue req: 444
  11451. 2025-07-20 15:28:56,355 - sglang - INFO - [2025-07-20 15:28:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2903, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 443
  11452. 2025-07-20 15:28:56,355 - __main__ - INFO - sglang running req: 9 queue req: 443
  11453. 2025-07-20 15:28:57,804 - sglang - INFO - [2025-07-20 15:28:57 TP0] Decode batch. #running-req: 10, #token: 28631, token usage: 0.75, gen throughput (token/s): 154.97, #queue-req: 443
  11454. 2025-07-20 15:28:57,804 - __main__ - INFO - sglang running req: 10 queue req: 443
  11455. 2025-07-20 15:28:58,592 - sglang - INFO - [2025-07-20 15:28:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2733, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 442
  11456. 2025-07-20 15:28:58,592 - __main__ - INFO - sglang running req: 9 queue req: 442
  11457. 2025-07-20 15:28:59,599 - sglang - INFO - [2025-07-20 15:28:59 TP0] Decode batch. #running-req: 10, #token: 27979, token usage: 0.74, gen throughput (token/s): 222.23, #queue-req: 442
  11458. 2025-07-20 15:28:59,599 - __main__ - INFO - sglang running req: 10 queue req: 442
  11459. 2025-07-20 15:29:00,590 - sglang - INFO - [2025-07-20 15:29:00 TP0] Decode batch. #running-req: 10, #token: 28379, token usage: 0.75, gen throughput (token/s): 403.64, #queue-req: 442
  11460. 2025-07-20 15:29:00,590 - __main__ - INFO - sglang running req: 10 queue req: 442
  11461. 2025-07-20 15:29:01,552 - sglang - INFO - [2025-07-20 15:29:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2739, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 441
  11462. 2025-07-20 15:29:01,552 - __main__ - INFO - sglang running req: 9 queue req: 441
  11463. 2025-07-20 15:29:02,386 - sglang - INFO - [2025-07-20 15:29:02 TP0] Decode batch. #running-req: 10, #token: 29246, token usage: 0.77, gen throughput (token/s): 222.20, #queue-req: 441
  11464. 2025-07-20 15:29:02,387 - __main__ - INFO - sglang running req: 10 queue req: 441
  11465. 2025-07-20 15:29:02,608 - sglang - INFO - [2025-07-20 15:29:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1377, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 440
  11466. 2025-07-20 15:29:02,608 - __main__ - INFO - sglang running req: 9 queue req: 440
  11467. 2025-07-20 15:29:03,674 - __main__ - INFO - Queue remaining: 2
  11468. 2025-07-20 15:29:03,674 - __main__ - INFO -
  11469. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11470. ----------------------------------------------------------------------------------
  11471. sglang_input_tokens 555.46 555.46
  11472. sglang_output_tokens 156.88 156.88
  11473. 2025-07-20 15:29:03,674 - __main__ - INFO -
  11474. Worker ID | finished | started
  11475. ----------+----------+--------
  11476. 0 | 60 | 500
  11477. 1 | 0 | 10
  11478. 2025-07-20 15:29:03,887 - sglang - INFO - [2025-07-20 15:29:03 TP0] Decode batch. #running-req: 10, #token: 29066, token usage: 0.77, gen throughput (token/s): 265.87, #queue-req: 440
  11479. 2025-07-20 15:29:03,887 - __main__ - INFO - sglang running req: 10 queue req: 440
  11480. 2025-07-20 15:29:04,548 - sglang - INFO - [2025-07-20 15:29:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1264, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 439
  11481. 2025-07-20 15:29:04,549 - __main__ - INFO - sglang running req: 9 queue req: 439
  11482. 2025-07-20 15:29:05,403 - sglang - INFO - [2025-07-20 15:29:05 TP0] Decode batch. #running-req: 10, #token: 27787, token usage: 0.73, gen throughput (token/s): 263.14, #queue-req: 439
  11483. 2025-07-20 15:29:05,403 - __main__ - INFO - sglang running req: 10 queue req: 439
  11484. 2025-07-20 15:29:05,722 - sglang - INFO - [2025-07-20 15:29:05 TP0] Prefill batch. #new-seq: 2, #new-token: 4842, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 437
  11485. 2025-07-20 15:29:05,722 - __main__ - INFO - sglang running req: 9 queue req: 437
  11486. 2025-07-20 15:29:07,832 - sglang - INFO - [2025-07-20 15:29:07 TP0] Decode batch. #running-req: 11, #token: 29796, token usage: 0.78, gen throughput (token/s): 175.39, #queue-req: 437
  11487. 2025-07-20 15:29:07,832 - __main__ - INFO - sglang running req: 11 queue req: 437
  11488. 2025-07-20 15:29:08,425 - sglang - INFO - [2025-07-20 15:29:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2796, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 436
  11489. 2025-07-20 15:29:08,425 - __main__ - INFO - sglang running req: 10 queue req: 436
  11490. 2025-07-20 15:29:09,639 - sglang - INFO - [2025-07-20 15:29:09 TP0] Decode batch. #running-req: 11, #token: 31520, token usage: 0.83, gen throughput (token/s): 242.92, #queue-req: 436
  11491. 2025-07-20 15:29:09,639 - __main__ - INFO - sglang running req: 11 queue req: 436
  11492. 2025-07-20 15:29:10,641 - sglang - INFO - [2025-07-20 15:29:10 TP0] Decode batch. #running-req: 11, #token: 31960, token usage: 0.84, gen throughput (token/s): 439.24, #queue-req: 436
  11493. 2025-07-20 15:29:10,641 - __main__ - INFO - sglang running req: 11 queue req: 436
  11494. 2025-07-20 15:29:11,641 - sglang - INFO - [2025-07-20 15:29:11 TP0] Decode batch. #running-req: 10, #token: 30963, token usage: 0.82, gen throughput (token/s): 431.98, #queue-req: 436
  11495. 2025-07-20 15:29:11,641 - __main__ - INFO - sglang running req: 10 queue req: 436
  11496. 2025-07-20 15:29:12,333 - sglang - INFO - [2025-07-20 15:29:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 435
  11497. 2025-07-20 15:29:12,333 - __main__ - INFO - sglang running req: 9 queue req: 435
  11498. 2025-07-20 15:29:13,441 - sglang - INFO - [2025-07-20 15:29:13 TP0] Decode batch. #running-req: 10, #token: 30400, token usage: 0.80, gen throughput (token/s): 221.64, #queue-req: 435
  11499. 2025-07-20 15:29:13,441 - __main__ - INFO - sglang running req: 10 queue req: 435
  11500. 2025-07-20 15:29:13,676 - __main__ - INFO - Queue remaining: 2
  11501. 2025-07-20 15:29:13,676 - __main__ - INFO -
  11502. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11503. ----------------------------------------------------------------------------------
  11504. sglang_input_tokens 573.53 573.53
  11505. sglang_output_tokens 161.01 161.01
  11506. 2025-07-20 15:29:13,676 - __main__ - INFO -
  11507. Worker ID | finished | started
  11508. ----------+----------+--------
  11509. 0 | 65 | 500
  11510. 1 | 0 | 10
  11511. 2025-07-20 15:29:14,433 - sglang - INFO - [2025-07-20 15:29:14 TP0] Decode batch. #running-req: 9, #token: 27000, token usage: 0.71, gen throughput (token/s): 402.03, #queue-req: 435
  11512. 2025-07-20 15:29:14,434 - __main__ - INFO - sglang running req: 9 queue req: 435
  11513. 2025-07-20 15:29:14,434 - sglang - INFO - [2025-07-20 15:29:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2771, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 434
  11514. 2025-07-20 15:29:14,434 - __main__ - INFO - sglang running req: 9 queue req: 434
  11515. 2025-07-20 15:29:16,242 - sglang - INFO - [2025-07-20 15:29:16 TP0] Decode batch. #running-req: 10, #token: 30171, token usage: 0.79, gen throughput (token/s): 221.21, #queue-req: 434
  11516. 2025-07-20 15:29:16,242 - __main__ - INFO - sglang running req: 10 queue req: 434
  11517. 2025-07-20 15:29:16,859 - sglang - INFO - [2025-07-20 15:29:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2144, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 433
  11518. 2025-07-20 15:29:16,859 - __main__ - INFO - sglang running req: 9 queue req: 433
  11519. 2025-07-20 15:29:17,902 - sglang - INFO - [2025-07-20 15:29:17 TP0] Decode batch. #running-req: 10, #token: 29780, token usage: 0.78, gen throughput (token/s): 240.30, #queue-req: 433
  11520. 2025-07-20 15:29:17,902 - __main__ - INFO - sglang running req: 10 queue req: 433
  11521. 2025-07-20 15:29:18,885 - sglang - INFO - [2025-07-20 15:29:18 TP0] Decode batch. #running-req: 10, #token: 30180, token usage: 0.79, gen throughput (token/s): 406.90, #queue-req: 433
  11522. 2025-07-20 15:29:18,886 - __main__ - INFO - sglang running req: 10 queue req: 433
  11523. 2025-07-20 15:29:19,870 - sglang - INFO - [2025-07-20 15:29:19 TP0] Decode batch. #running-req: 10, #token: 30580, token usage: 0.81, gen throughput (token/s): 406.04, #queue-req: 433
  11524. 2025-07-20 15:29:19,870 - __main__ - INFO - sglang running req: 10 queue req: 433
  11525. 2025-07-20 15:29:20,068 - sglang - INFO - [2025-07-20 15:29:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2166, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 432
  11526. 2025-07-20 15:29:20,068 - __main__ - INFO - sglang running req: 9 queue req: 432
  11527. 2025-07-20 15:29:21,531 - sglang - INFO - [2025-07-20 15:29:21 TP0] Decode batch. #running-req: 10, #token: 29523, token usage: 0.78, gen throughput (token/s): 240.32, #queue-req: 432
  11528. 2025-07-20 15:29:21,531 - __main__ - INFO - sglang running req: 10 queue req: 432
  11529. 2025-07-20 15:29:22,516 - sglang - INFO - [2025-07-20 15:29:22 TP0] Decode batch. #running-req: 10, #token: 29923, token usage: 0.79, gen throughput (token/s): 406.15, #queue-req: 432
  11530. 2025-07-20 15:29:22,516 - __main__ - INFO - sglang running req: 10 queue req: 432
  11531. 2025-07-20 15:29:23,500 - sglang - INFO - [2025-07-20 15:29:23 TP0] Decode batch. #running-req: 10, #token: 30323, token usage: 0.80, gen throughput (token/s): 406.20, #queue-req: 432
  11532. 2025-07-20 15:29:23,501 - __main__ - INFO - sglang running req: 10 queue req: 432
  11533. 2025-07-20 15:29:23,677 - __main__ - INFO - Queue remaining: 2
  11534. 2025-07-20 15:29:23,677 - __main__ - INFO -
  11535. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11536. ----------------------------------------------------------------------------------
  11537. sglang_input_tokens 582.10 582.10
  11538. sglang_output_tokens 163.61 163.61
  11539. 2025-07-20 15:29:23,678 - __main__ - INFO -
  11540. Worker ID | finished | started
  11541. ----------+----------+--------
  11542. 0 | 68 | 500
  11543. 1 | 0 | 10
  11544. 2025-07-20 15:29:24,488 - sglang - INFO - [2025-07-20 15:29:24 TP0] Decode batch. #running-req: 10, #token: 30723, token usage: 0.81, gen throughput (token/s): 405.01, #queue-req: 432
  11545. 2025-07-20 15:29:24,488 - __main__ - INFO - sglang running req: 10 queue req: 432
  11546. 2025-07-20 15:29:25,475 - sglang - INFO - [2025-07-20 15:29:25 TP0] Decode batch. #running-req: 10, #token: 31123, token usage: 0.82, gen throughput (token/s): 405.25, #queue-req: 432
  11547. 2025-07-20 15:29:25,475 - __main__ - INFO - sglang running req: 10 queue req: 432
  11548. 2025-07-20 15:29:26,462 - sglang - INFO - [2025-07-20 15:29:26 TP0] Decode batch. #running-req: 10, #token: 31523, token usage: 0.83, gen throughput (token/s): 405.14, #queue-req: 432
  11549. 2025-07-20 15:29:26,462 - __main__ - INFO - sglang running req: 10 queue req: 432
  11550. 2025-07-20 15:29:26,857 - sglang - INFO - [2025-07-20 15:29:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 431
  11551. 2025-07-20 15:29:26,857 - __main__ - INFO - sglang running req: 9 queue req: 431
  11552. 2025-07-20 15:29:28,246 - sglang - INFO - [2025-07-20 15:29:28 TP0] Decode batch. #running-req: 10, #token: 30814, token usage: 0.81, gen throughput (token/s): 223.69, #queue-req: 431
  11553. 2025-07-20 15:29:28,246 - __main__ - INFO - sglang running req: 10 queue req: 431
  11554. 2025-07-20 15:29:29,237 - sglang - INFO - [2025-07-20 15:29:29 TP0] Decode batch. #running-req: 10, #token: 31214, token usage: 0.82, gen throughput (token/s): 403.75, #queue-req: 431
  11555. 2025-07-20 15:29:29,237 - __main__ - INFO - sglang running req: 10 queue req: 431
  11556. 2025-07-20 15:29:29,980 - sglang - INFO - [2025-07-20 15:29:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2401, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 430
  11557. 2025-07-20 15:29:29,980 - __main__ - INFO - sglang running req: 9 queue req: 430
  11558. 2025-07-20 15:29:30,967 - sglang - INFO - [2025-07-20 15:29:30 TP0] Decode batch. #running-req: 10, #token: 30789, token usage: 0.81, gen throughput (token/s): 230.63, #queue-req: 430
  11559. 2025-07-20 15:29:30,967 - __main__ - INFO - sglang running req: 10 queue req: 430
  11560. 2025-07-20 15:29:31,041 - sglang - INFO - [2025-07-20 15:29:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 429
  11561. 2025-07-20 15:29:31,041 - __main__ - INFO - sglang running req: 9 queue req: 429
  11562. 2025-07-20 15:29:31,765 - sglang - INFO - [2025-07-20 15:29:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2047, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 428
  11563. 2025-07-20 15:29:31,766 - __main__ - INFO - sglang running req: 9 queue req: 428
  11564. 2025-07-20 15:29:33,115 - sglang - INFO - [2025-07-20 15:29:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 427
  11565. 2025-07-20 15:29:33,115 - __main__ - INFO - sglang running req: 9 queue req: 427
  11566. 2025-07-20 15:29:33,679 - __main__ - INFO - Queue remaining: 2
  11567. 2025-07-20 15:29:33,680 - __main__ - INFO -
  11568. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11569. ----------------------------------------------------------------------------------
  11570. sglang_input_tokens 608.52 608.52
  11571. sglang_output_tokens 172.68 172.68
  11572. 2025-07-20 15:29:33,680 - __main__ - INFO -
  11573. Worker ID | finished | started
  11574. ----------+----------+--------
  11575. 0 | 73 | 500
  11576. 1 | 0 | 10
  11577. 2025-07-20 15:29:34,121 - sglang - INFO - [2025-07-20 15:29:34 TP0] Decode batch. #running-req: 10, #token: 27860, token usage: 0.73, gen throughput (token/s): 125.88, #queue-req: 427
  11578. 2025-07-20 15:29:34,121 - __main__ - INFO - sglang running req: 10 queue req: 427
  11579. 2025-07-20 15:29:35,102 - sglang - INFO - [2025-07-20 15:29:35 TP0] Decode batch. #running-req: 10, #token: 28260, token usage: 0.74, gen throughput (token/s): 407.78, #queue-req: 427
  11580. 2025-07-20 15:29:35,102 - __main__ - INFO - sglang running req: 10 queue req: 427
  11581. 2025-07-20 15:29:36,084 - sglang - INFO - [2025-07-20 15:29:36 TP0] Decode batch. #running-req: 10, #token: 28660, token usage: 0.75, gen throughput (token/s): 407.21, #queue-req: 427
  11582. 2025-07-20 15:29:36,084 - __main__ - INFO - sglang running req: 10 queue req: 427
  11583. 2025-07-20 15:29:37,042 - sglang - INFO - [2025-07-20 15:29:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 426
  11584. 2025-07-20 15:29:37,042 - __main__ - INFO - sglang running req: 9 queue req: 426
  11585. 2025-07-20 15:29:37,825 - sglang - INFO - [2025-07-20 15:29:37 TP0] Decode batch. #running-req: 10, #token: 27905, token usage: 0.73, gen throughput (token/s): 229.16, #queue-req: 426
  11586. 2025-07-20 15:29:37,825 - __main__ - INFO - sglang running req: 10 queue req: 426
  11587. 2025-07-20 15:29:38,806 - sglang - INFO - [2025-07-20 15:29:38 TP0] Decode batch. #running-req: 10, #token: 28305, token usage: 0.75, gen throughput (token/s): 407.67, #queue-req: 426
  11588. 2025-07-20 15:29:38,806 - __main__ - INFO - sglang running req: 10 queue req: 426
  11589. 2025-07-20 15:29:39,790 - sglang - INFO - [2025-07-20 15:29:39 TP0] Decode batch. #running-req: 10, #token: 28705, token usage: 0.76, gen throughput (token/s): 406.59, #queue-req: 426
  11590. 2025-07-20 15:29:39,790 - __main__ - INFO - sglang running req: 10 queue req: 426
  11591. 2025-07-20 15:29:40,259 - sglang - INFO - [2025-07-20 15:29:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 425
  11592. 2025-07-20 15:29:40,259 - __main__ - INFO - sglang running req: 9 queue req: 425
  11593. 2025-07-20 15:29:41,516 - sglang - INFO - [2025-07-20 15:29:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 424
  11594. 2025-07-20 15:29:41,516 - __main__ - INFO - sglang running req: 9 queue req: 424
  11595. 2025-07-20 15:29:42,360 - sglang - INFO - [2025-07-20 15:29:42 TP0] Decode batch. #running-req: 10, #token: 27638, token usage: 0.73, gen throughput (token/s): 154.83, #queue-req: 424
  11596. 2025-07-20 15:29:42,361 - __main__ - INFO - sglang running req: 10 queue req: 424
  11597. 2025-07-20 15:29:43,271 - sglang - INFO - [2025-07-20 15:29:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 423
  11598. 2025-07-20 15:29:43,271 - __main__ - INFO - sglang running req: 9 queue req: 423
  11599. 2025-07-20 15:29:43,682 - __main__ - INFO - Queue remaining: 2
  11600. 2025-07-20 15:29:43,683 - __main__ - INFO -
  11601. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11602. ----------------------------------------------------------------------------------
  11603. sglang_input_tokens 623.73 623.73
  11604. sglang_output_tokens 177.95 177.95
  11605. 2025-07-20 15:29:43,683 - __main__ - INFO -
  11606. Worker ID | finished | started
  11607. ----------+----------+--------
  11608. 0 | 77 | 500
  11609. 1 | 0 | 10
  11610. 2025-07-20 15:29:44,156 - sglang - INFO - [2025-07-20 15:29:44 TP0] Decode batch. #running-req: 10, #token: 27178, token usage: 0.72, gen throughput (token/s): 222.21, #queue-req: 423
  11611. 2025-07-20 15:29:44,156 - __main__ - INFO - sglang running req: 10 queue req: 423
  11612. 2025-07-20 15:29:45,136 - sglang - INFO - [2025-07-20 15:29:45 TP0] Decode batch. #running-req: 10, #token: 27578, token usage: 0.73, gen throughput (token/s): 408.24, #queue-req: 423
  11613. 2025-07-20 15:29:45,136 - __main__ - INFO - sglang running req: 10 queue req: 423
  11614. 2025-07-20 15:29:45,455 - sglang - INFO - [2025-07-20 15:29:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3896, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 421
  11615. 2025-07-20 15:29:45,455 - __main__ - INFO - sglang running req: 9 queue req: 421
  11616. 2025-07-20 15:29:47,390 - sglang - INFO - [2025-07-20 15:29:47 TP0] Decode batch. #running-req: 11, #token: 29010, token usage: 0.76, gen throughput (token/s): 188.98, #queue-req: 421
  11617. 2025-07-20 15:29:47,390 - __main__ - INFO - sglang running req: 11 queue req: 421
  11618. 2025-07-20 15:29:48,377 - sglang - INFO - [2025-07-20 15:29:48 TP0] Decode batch. #running-req: 11, #token: 29450, token usage: 0.78, gen throughput (token/s): 445.92, #queue-req: 421
  11619. 2025-07-20 15:29:48,377 - __main__ - INFO - sglang running req: 11 queue req: 421
  11620. 2025-07-20 15:29:49,363 - sglang - INFO - [2025-07-20 15:29:49 TP0] Decode batch. #running-req: 11, #token: 29890, token usage: 0.79, gen throughput (token/s): 446.11, #queue-req: 421
  11621. 2025-07-20 15:29:49,363 - __main__ - INFO - sglang running req: 11 queue req: 421
  11622. 2025-07-20 15:29:50,351 - sglang - INFO - [2025-07-20 15:29:50 TP0] Decode batch. #running-req: 11, #token: 30330, token usage: 0.80, gen throughput (token/s): 445.24, #queue-req: 421
  11623. 2025-07-20 15:29:50,351 - __main__ - INFO - sglang running req: 11 queue req: 421
  11624. 2025-07-20 15:29:50,748 - sglang - INFO - [2025-07-20 15:29:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 420
  11625. 2025-07-20 15:29:50,748 - __main__ - INFO - sglang running req: 10 queue req: 420
  11626. 2025-07-20 15:29:52,138 - sglang - INFO - [2025-07-20 15:29:52 TP0] Decode batch. #running-req: 11, #token: 30863, token usage: 0.81, gen throughput (token/s): 245.78, #queue-req: 420
  11627. 2025-07-20 15:29:52,138 - __main__ - INFO - sglang running req: 11 queue req: 420
  11628. 2025-07-20 15:29:52,361 - sglang - INFO - [2025-07-20 15:29:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 419
  11629. 2025-07-20 15:29:52,361 - __main__ - INFO - sglang running req: 10 queue req: 419
  11630. 2025-07-20 15:29:53,685 - __main__ - INFO - Queue remaining: 2
  11631. 2025-07-20 15:29:53,685 - __main__ - INFO -
  11632. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11633. ----------------------------------------------------------------------------------
  11634. sglang_input_tokens 624.13 624.13
  11635. sglang_output_tokens 178.09 178.09
  11636. 2025-07-20 15:29:53,686 - __main__ - INFO -
  11637. Worker ID | finished | started
  11638. ----------+----------+--------
  11639. 0 | 80 | 500
  11640. 1 | 0 | 10
  11641. 2025-07-20 15:29:53,742 - sglang - INFO - [2025-07-20 15:29:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 418
  11642. 2025-07-20 15:29:53,742 - __main__ - INFO - sglang running req: 10 queue req: 418
  11643. 2025-07-20 15:29:54,723 - sglang - INFO - [2025-07-20 15:29:54 TP0] Decode batch. #running-req: 11, #token: 32279, token usage: 0.85, gen throughput (token/s): 169.40, #queue-req: 418
  11644. 2025-07-20 15:29:54,723 - __main__ - INFO - sglang running req: 11 queue req: 418
  11645. 2025-07-20 15:29:55,719 - sglang - INFO - [2025-07-20 15:29:55 TP0] Decode batch. #running-req: 11, #token: 32719, token usage: 0.86, gen throughput (token/s): 441.73, #queue-req: 418
  11646. 2025-07-20 15:29:55,719 - __main__ - INFO - sglang running req: 11 queue req: 418
  11647. 2025-07-20 15:29:56,715 - sglang - INFO - [2025-07-20 15:29:56 TP0] Decode batch. #running-req: 11, #token: 33159, token usage: 0.87, gen throughput (token/s): 441.95, #queue-req: 418
  11648. 2025-07-20 15:29:56,715 - __main__ - INFO - sglang running req: 11 queue req: 418
  11649. 2025-07-20 15:29:57,437 - sglang - INFO - [2025-07-20 15:29:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2397, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 417
  11650. 2025-07-20 15:29:57,437 - __main__ - INFO - sglang running req: 10 queue req: 417
  11651. 2025-07-20 15:29:58,464 - sglang - INFO - [2025-07-20 15:29:58 TP0] Decode batch. #running-req: 11, #token: 32508, token usage: 0.86, gen throughput (token/s): 251.00, #queue-req: 417
  11652. 2025-07-20 15:29:58,464 - __main__ - INFO - sglang running req: 11 queue req: 417
  11653. 2025-07-20 15:29:59,461 - sglang - INFO - [2025-07-20 15:29:59 TP0] Decode batch. #running-req: 11, #token: 32948, token usage: 0.87, gen throughput (token/s): 441.17, #queue-req: 417
  11654. 2025-07-20 15:29:59,461 - __main__ - INFO - sglang running req: 11 queue req: 417
  11655. 2025-07-20 15:30:00,459 - sglang - INFO - [2025-07-20 15:30:00 TP0] Decode batch. #running-req: 11, #token: 33388, token usage: 0.88, gen throughput (token/s): 440.95, #queue-req: 417
  11656. 2025-07-20 15:30:00,459 - __main__ - INFO - sglang running req: 11 queue req: 417
  11657. 2025-07-20 15:30:01,457 - sglang - INFO - [2025-07-20 15:30:01 TP0] Decode batch. #running-req: 11, #token: 33828, token usage: 0.89, gen throughput (token/s): 440.77, #queue-req: 417
  11658. 2025-07-20 15:30:01,457 - __main__ - INFO - sglang running req: 11 queue req: 417
  11659. 2025-07-20 15:30:02,449 - sglang - INFO - [2025-07-20 15:30:02 TP0] Decode batch. #running-req: 10, #token: 28074, token usage: 0.74, gen throughput (token/s): 413.26, #queue-req: 417
  11660. 2025-07-20 15:30:02,450 - __main__ - INFO - sglang running req: 10 queue req: 417
  11661. 2025-07-20 15:30:02,474 - sglang - INFO - [2025-07-20 15:30:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2765, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 416
  11662. 2025-07-20 15:30:02,474 - __main__ - INFO - sglang running req: 9 queue req: 416
  11663. 2025-07-20 15:30:03,532 - sglang - INFO - [2025-07-20 15:30:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1862, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 415
  11664. 2025-07-20 15:30:03,532 - __main__ - INFO - sglang running req: 9 queue req: 415
  11665. 2025-07-20 15:30:03,687 - __main__ - INFO - Queue remaining: 2
  11666. 2025-07-20 15:30:03,687 - __main__ - INFO -
  11667. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11668. ----------------------------------------------------------------------------------
  11669. sglang_input_tokens 641.55 662.63
  11670. sglang_output_tokens 183.23 189.25
  11671. 2025-07-20 15:30:03,687 - __main__ - INFO -
  11672. Worker ID | finished | started
  11673. ----------+----------+--------
  11674. 0 | 85 | 500
  11675. 1 | 0 | 10
  11676. 2025-07-20 15:30:04,890 - sglang - INFO - [2025-07-20 15:30:04 TP0] Decode batch. #running-req: 10, #token: 29382, token usage: 0.77, gen throughput (token/s): 163.07, #queue-req: 415
  11677. 2025-07-20 15:30:04,890 - __main__ - INFO - sglang running req: 10 queue req: 415
  11678. 2025-07-20 15:30:05,871 - sglang - INFO - [2025-07-20 15:30:05 TP0] Decode batch. #running-req: 10, #token: 29782, token usage: 0.78, gen throughput (token/s): 407.75, #queue-req: 415
  11679. 2025-07-20 15:30:05,871 - __main__ - INFO - sglang running req: 10 queue req: 415
  11680. 2025-07-20 15:30:06,856 - sglang - INFO - [2025-07-20 15:30:06 TP0] Decode batch. #running-req: 10, #token: 30182, token usage: 0.79, gen throughput (token/s): 406.33, #queue-req: 415
  11681. 2025-07-20 15:30:06,856 - __main__ - INFO - sglang running req: 10 queue req: 415
  11682. 2025-07-20 15:30:07,843 - sglang - INFO - [2025-07-20 15:30:07 TP0] Decode batch. #running-req: 10, #token: 30582, token usage: 0.81, gen throughput (token/s): 405.03, #queue-req: 415
  11683. 2025-07-20 15:30:07,843 - __main__ - INFO - sglang running req: 10 queue req: 415
  11684. 2025-07-20 15:30:07,918 - sglang - INFO - [2025-07-20 15:30:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 414
  11685. 2025-07-20 15:30:07,918 - __main__ - INFO - sglang running req: 9 queue req: 414
  11686. 2025-07-20 15:30:09,586 - sglang - INFO - [2025-07-20 15:30:09 TP0] Decode batch. #running-req: 10, #token: 30044, token usage: 0.79, gen throughput (token/s): 228.89, #queue-req: 414
  11687. 2025-07-20 15:30:09,586 - __main__ - INFO - sglang running req: 10 queue req: 414
  11688. 2025-07-20 15:30:10,627 - sglang - INFO - [2025-07-20 15:30:10 TP0] Decode batch. #running-req: 10, #token: 30444, token usage: 0.80, gen throughput (token/s): 384.31, #queue-req: 414
  11689. 2025-07-20 15:30:10,627 - __main__ - INFO - sglang running req: 10 queue req: 414
  11690. 2025-07-20 15:30:10,948 - sglang - INFO - [2025-07-20 15:30:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2506, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 413
  11691. 2025-07-20 15:30:10,949 - __main__ - INFO - sglang running req: 9 queue req: 413
  11692. 2025-07-20 15:30:12,370 - sglang - INFO - [2025-07-20 15:30:12 TP0] Decode batch. #running-req: 10, #token: 29750, token usage: 0.78, gen throughput (token/s): 228.93, #queue-req: 413
  11693. 2025-07-20 15:30:12,370 - __main__ - INFO - sglang running req: 10 queue req: 413
  11694. 2025-07-20 15:30:13,355 - sglang - INFO - [2025-07-20 15:30:13 TP0] Decode batch. #running-req: 10, #token: 30150, token usage: 0.79, gen throughput (token/s): 406.20, #queue-req: 413
  11695. 2025-07-20 15:30:13,355 - __main__ - INFO - sglang running req: 10 queue req: 413
  11696. 2025-07-20 15:30:13,689 - __main__ - INFO - Queue remaining: 2
  11697. 2025-07-20 15:30:13,689 - __main__ - INFO -
  11698. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11699. ----------------------------------------------------------------------------------
  11700. sglang_input_tokens 637.87 680.10
  11701. sglang_output_tokens 182.93 195.04
  11702. 2025-07-20 15:30:13,689 - __main__ - INFO -
  11703. Worker ID | finished | started
  11704. ----------+----------+--------
  11705. 0 | 87 | 500
  11706. 1 | 0 | 10
  11707. 2025-07-20 15:30:14,071 - sglang - INFO - [2025-07-20 15:30:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2601, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 412
  11708. 2025-07-20 15:30:14,071 - __main__ - INFO - sglang running req: 9 queue req: 412
  11709. 2025-07-20 15:30:15,133 - sglang - INFO - [2025-07-20 15:30:15 TP0] Decode batch. #running-req: 10, #token: 29491, token usage: 0.78, gen throughput (token/s): 224.31, #queue-req: 412
  11710. 2025-07-20 15:30:15,134 - __main__ - INFO - sglang running req: 10 queue req: 412
  11711. 2025-07-20 15:30:15,183 - sglang - INFO - [2025-07-20 15:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2203, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 411
  11712. 2025-07-20 15:30:15,184 - __main__ - INFO - sglang running req: 9 queue req: 411
  11713. 2025-07-20 15:30:15,984 - sglang - INFO - [2025-07-20 15:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1564, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 410
  11714. 2025-07-20 15:30:15,984 - __main__ - INFO - sglang running req: 9 queue req: 410
  11715. 2025-07-20 15:30:17,426 - sglang - INFO - [2025-07-20 15:30:17 TP0] Decode batch. #running-req: 10, #token: 27380, token usage: 0.72, gen throughput (token/s): 173.57, #queue-req: 410
  11716. 2025-07-20 15:30:17,427 - __main__ - INFO - sglang running req: 10 queue req: 410
  11717. 2025-07-20 15:30:17,918 - sglang - INFO - [2025-07-20 15:30:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 409
  11718. 2025-07-20 15:30:17,918 - __main__ - INFO - sglang running req: 9 queue req: 409
  11719. 2025-07-20 15:30:19,246 - sglang - INFO - [2025-07-20 15:30:19 TP0] Decode batch. #running-req: 10, #token: 28450, token usage: 0.75, gen throughput (token/s): 219.29, #queue-req: 409
  11720. 2025-07-20 15:30:19,246 - __main__ - INFO - sglang running req: 10 queue req: 409
  11721. 2025-07-20 15:30:20,225 - sglang - INFO - [2025-07-20 15:30:20 TP0] Decode batch. #running-req: 10, #token: 25355, token usage: 0.67, gen throughput (token/s): 408.51, #queue-req: 409
  11722. 2025-07-20 15:30:20,225 - __main__ - INFO - sglang running req: 10 queue req: 409
  11723. 2025-07-20 15:30:20,250 - sglang - INFO - [2025-07-20 15:30:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 408
  11724. 2025-07-20 15:30:20,250 - __main__ - INFO - sglang running req: 9 queue req: 408
  11725. 2025-07-20 15:30:21,468 - sglang - INFO - [2025-07-20 15:30:21 TP0] Prefill batch. #new-seq: 2, #new-token: 4154, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 406
  11726. 2025-07-20 15:30:21,468 - __main__ - INFO - sglang running req: 9 queue req: 406
  11727. 2025-07-20 15:30:23,291 - sglang - INFO - [2025-07-20 15:30:23 TP0] Decode batch. #running-req: 11, #token: 29069, token usage: 0.77, gen throughput (token/s): 136.33, #queue-req: 406
  11728. 2025-07-20 15:30:23,291 - __main__ - INFO - sglang running req: 11 queue req: 406
  11729. 2025-07-20 15:30:23,690 - __main__ - INFO - Queue remaining: 2
  11730. 2025-07-20 15:30:23,690 - __main__ - INFO -
  11731. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11732. ----------------------------------------------------------------------------------
  11733. sglang_input_tokens 662.78 728.76
  11734. sglang_output_tokens 190.54 209.51
  11735. 2025-07-20 15:30:23,691 - __main__ - INFO -
  11736. Worker ID | finished | started
  11737. ----------+----------+--------
  11738. 0 | 93 | 500
  11739. 1 | 0 | 10
  11740. 2025-07-20 15:30:24,278 - sglang - INFO - [2025-07-20 15:30:24 TP0] Decode batch. #running-req: 11, #token: 29509, token usage: 0.78, gen throughput (token/s): 445.83, #queue-req: 406
  11741. 2025-07-20 15:30:24,278 - __main__ - INFO - sglang running req: 11 queue req: 406
  11742. 2025-07-20 15:30:25,266 - sglang - INFO - [2025-07-20 15:30:25 TP0] Decode batch. #running-req: 11, #token: 29949, token usage: 0.79, gen throughput (token/s): 445.25, #queue-req: 406
  11743. 2025-07-20 15:30:25,266 - __main__ - INFO - sglang running req: 11 queue req: 406
  11744. 2025-07-20 15:30:25,934 - sglang - INFO - [2025-07-20 15:30:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2035, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 405
  11745. 2025-07-20 15:30:25,935 - __main__ - INFO - sglang running req: 10 queue req: 405
  11746. 2025-07-20 15:30:26,917 - sglang - INFO - [2025-07-20 15:30:26 TP0] Decode batch. #running-req: 11, #token: 30598, token usage: 0.81, gen throughput (token/s): 266.02, #queue-req: 405
  11747. 2025-07-20 15:30:26,917 - __main__ - INFO - sglang running req: 11 queue req: 405
  11748. 2025-07-20 15:30:27,906 - sglang - INFO - [2025-07-20 15:30:27 TP0] Decode batch. #running-req: 11, #token: 31038, token usage: 0.82, gen throughput (token/s): 444.75, #queue-req: 405
  11749. 2025-07-20 15:30:27,906 - __main__ - INFO - sglang running req: 11 queue req: 405
  11750. 2025-07-20 15:30:28,897 - sglang - INFO - [2025-07-20 15:30:28 TP0] Decode batch. #running-req: 11, #token: 31478, token usage: 0.83, gen throughput (token/s): 443.81, #queue-req: 405
  11751. 2025-07-20 15:30:28,897 - __main__ - INFO - sglang running req: 11 queue req: 405
  11752. 2025-07-20 15:30:29,891 - sglang - INFO - [2025-07-20 15:30:29 TP0] Decode batch. #running-req: 11, #token: 31918, token usage: 0.84, gen throughput (token/s): 442.91, #queue-req: 405
  11753. 2025-07-20 15:30:29,891 - __main__ - INFO - sglang running req: 11 queue req: 405
  11754. 2025-07-20 15:30:30,313 - sglang - INFO - [2025-07-20 15:30:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 404
  11755. 2025-07-20 15:30:30,313 - __main__ - INFO - sglang running req: 10 queue req: 404
  11756. 2025-07-20 15:30:31,573 - sglang - INFO - [2025-07-20 15:30:31 TP0] Decode batch. #running-req: 11, #token: 32260, token usage: 0.85, gen throughput (token/s): 260.87, #queue-req: 404
  11757. 2025-07-20 15:30:31,574 - __main__ - INFO - sglang running req: 11 queue req: 404
  11758. 2025-07-20 15:30:32,569 - sglang - INFO - [2025-07-20 15:30:32 TP0] Decode batch. #running-req: 10, #token: 29096, token usage: 0.77, gen throughput (token/s): 440.93, #queue-req: 404
  11759. 2025-07-20 15:30:32,569 - __main__ - INFO - sglang running req: 10 queue req: 404
  11760. 2025-07-20 15:30:32,569 - sglang - INFO - [2025-07-20 15:30:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 403
  11761. 2025-07-20 15:30:32,570 - __main__ - INFO - sglang running req: 10 queue req: 403
  11762. 2025-07-20 15:30:33,692 - __main__ - INFO - Queue remaining: 2
  11763. 2025-07-20 15:30:33,692 - __main__ - INFO -
  11764. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11765. ----------------------------------------------------------------------------------
  11766. sglang_input_tokens 661.19 749.05
  11767. sglang_output_tokens 188.99 214.10
  11768. 2025-07-20 15:30:33,693 - __main__ - INFO -
  11769. Worker ID | finished | started
  11770. ----------+----------+--------
  11771. 0 | 96 | 500
  11772. 1 | 0 | 10
  11773. 2025-07-20 15:30:34,373 - sglang - INFO - [2025-07-20 15:30:34 TP0] Decode batch. #running-req: 11, #token: 32282, token usage: 0.85, gen throughput (token/s): 243.95, #queue-req: 403
  11774. 2025-07-20 15:30:34,373 - __main__ - INFO - sglang running req: 11 queue req: 403
  11775. 2025-07-20 15:30:35,367 - sglang - INFO - [2025-07-20 15:30:35 TP0] Decode batch. #running-req: 11, #token: 32722, token usage: 0.86, gen throughput (token/s): 442.79, #queue-req: 403
  11776. 2025-07-20 15:30:35,367 - __main__ - INFO - sglang running req: 11 queue req: 403
  11777. 2025-07-20 15:30:36,360 - sglang - INFO - [2025-07-20 15:30:36 TP0] Decode batch. #running-req: 10, #token: 29923, token usage: 0.79, gen throughput (token/s): 433.65, #queue-req: 403
  11778. 2025-07-20 15:30:36,361 - __main__ - INFO - sglang running req: 10 queue req: 403
  11779. 2025-07-20 15:30:36,533 - sglang - INFO - [2025-07-20 15:30:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 402
  11780. 2025-07-20 15:30:36,533 - __main__ - INFO - sglang running req: 9 queue req: 402
  11781. 2025-07-20 15:30:38,184 - sglang - INFO - [2025-07-20 15:30:38 TP0] Decode batch. #running-req: 10, #token: 30043, token usage: 0.79, gen throughput (token/s): 218.75, #queue-req: 402
  11782. 2025-07-20 15:30:38,185 - __main__ - INFO - sglang running req: 10 queue req: 402
  11783. 2025-07-20 15:30:38,603 - sglang - INFO - [2025-07-20 15:30:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2916, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 401
  11784. 2025-07-20 15:30:38,604 - __main__ - INFO - sglang running req: 9 queue req: 401
  11785. 2025-07-20 15:30:39,997 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
  11786. 2025-07-20 15:30:40,008 - sglang - INFO - [2025-07-20 15:30:40 TP0] Decode batch. #running-req: 9, #token: 25854, token usage: 0.68, gen throughput (token/s): 218.21, #queue-req: 401
  11787. 2025-07-20 15:30:40,008 - __main__ - INFO - sglang running req: 9 queue req: 401
  11788. 2025-07-20 15:30:40,009 - sglang - INFO - [2025-07-20 15:30:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 400
  11789. 2025-07-20 15:30:40,009 - __main__ - INFO - sglang running req: 9 queue req: 400
  11790. 2025-07-20 15:30:40,263 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
  11791. 2025-07-20 15:30:41,804 - sglang - INFO - [2025-07-20 15:30:41 TP0] Decode batch. #running-req: 10, #token: 29002, token usage: 0.76, gen throughput (token/s): 222.74, #queue-req: 401
  11792. 2025-07-20 15:30:41,804 - __main__ - INFO - sglang running req: 10 queue req: 401
  11793. 2025-07-20 15:30:42,783 - sglang - INFO - [2025-07-20 15:30:42 TP0] Decode batch. #running-req: 10, #token: 29402, token usage: 0.77, gen throughput (token/s): 408.78, #queue-req: 401
  11794. 2025-07-20 15:30:42,783 - __main__ - INFO - sglang running req: 10 queue req: 401
  11795. 2025-07-20 15:30:43,028 - sglang - INFO - [2025-07-20 15:30:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 400
  11796. 2025-07-20 15:30:43,028 - __main__ - INFO - sglang running req: 9 queue req: 400
  11797. 2025-07-20 15:30:43,694 - __main__ - INFO - Queue remaining: 2
  11798. 2025-07-20 15:30:43,694 - __main__ - INFO -
  11799. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11800. ----------------------------------------------------------------------------------
  11801. sglang_input_tokens 678.41 791.17
  11802. sglang_output_tokens 195.85 228.40
  11803. 2025-07-20 15:30:43,695 - __main__ - INFO -
  11804. Worker ID | finished | started
  11805. ----------+----------+--------
  11806. 0 | 100 | 500
  11807. 1 | 0 | 10
  11808. 2025-07-20 15:30:44,598 - sglang - INFO - [2025-07-20 15:30:44 TP0] Decode batch. #running-req: 10, #token: 29691, token usage: 0.78, gen throughput (token/s): 219.76, #queue-req: 400
  11809. 2025-07-20 15:30:44,598 - __main__ - INFO - sglang running req: 10 queue req: 400
  11810. 2025-07-20 15:30:44,771 - sglang - INFO - [2025-07-20 15:30:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 399
  11811. 2025-07-20 15:30:44,771 - __main__ - INFO - sglang running req: 9 queue req: 399
  11812. 2025-07-20 15:30:46,317 - sglang - INFO - [2025-07-20 15:30:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1420, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 398
  11813. 2025-07-20 15:30:46,318 - __main__ - INFO - sglang running req: 9 queue req: 398
  11814. 2025-07-20 15:30:46,895 - sglang - INFO - [2025-07-20 15:30:46 TP0] Decode batch. #running-req: 10, #token: 29051, token usage: 0.76, gen throughput (token/s): 173.32, #queue-req: 398
  11815. 2025-07-20 15:30:46,895 - __main__ - INFO - sglang running req: 10 queue req: 398
  11816. 2025-07-20 15:30:47,879 - sglang - INFO - [2025-07-20 15:30:47 TP0] Decode batch. #running-req: 10, #token: 29451, token usage: 0.78, gen throughput (token/s): 406.39, #queue-req: 398
  11817. 2025-07-20 15:30:47,879 - __main__ - INFO - sglang running req: 10 queue req: 398
  11818. 2025-07-20 15:30:48,864 - sglang - INFO - [2025-07-20 15:30:48 TP0] Decode batch. #running-req: 10, #token: 29851, token usage: 0.79, gen throughput (token/s): 406.00, #queue-req: 398
  11819. 2025-07-20 15:30:48,865 - __main__ - INFO - sglang running req: 10 queue req: 398
  11820. 2025-07-20 15:30:49,258 - sglang - INFO - [2025-07-20 15:30:49 TP0] Prefill batch. #new-seq: 2, #new-token: 3316, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 396
  11821. 2025-07-20 15:30:49,258 - __main__ - INFO - sglang running req: 9 queue req: 396
  11822. 2025-07-20 15:30:50,814 - sglang - INFO - [2025-07-20 15:30:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1991, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 395
  11823. 2025-07-20 15:30:50,814 - __main__ - INFO - sglang running req: 10 queue req: 395
  11824. 2025-07-20 15:30:51,645 - sglang - INFO - [2025-07-20 15:30:51 TP0] Decode batch. #running-req: 11, #token: 28561, token usage: 0.75, gen throughput (token/s): 151.74, #queue-req: 395
  11825. 2025-07-20 15:30:51,645 - __main__ - INFO - sglang running req: 11 queue req: 395
  11826. 2025-07-20 15:30:52,633 - sglang - INFO - [2025-07-20 15:30:52 TP0] Decode batch. #running-req: 11, #token: 29001, token usage: 0.76, gen throughput (token/s): 445.62, #queue-req: 395
  11827. 2025-07-20 15:30:52,633 - __main__ - INFO - sglang running req: 11 queue req: 395
  11828. 2025-07-20 15:30:52,781 - sglang - INFO - [2025-07-20 15:30:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 394
  11829. 2025-07-20 15:30:52,781 - __main__ - INFO - sglang running req: 10 queue req: 394
  11830. 2025-07-20 15:30:53,697 - __main__ - INFO - Queue remaining: 2
  11831. 2025-07-20 15:30:53,697 - __main__ - INFO -
  11832. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11833. ----------------------------------------------------------------------------------
  11834. sglang_input_tokens 692.01 830.11
  11835. sglang_output_tokens 200.23 240.19
  11836. 2025-07-20 15:30:53,697 - __main__ - INFO -
  11837. Worker ID | finished | started
  11838. ----------+----------+--------
  11839. 0 | 105 | 500
  11840. 1 | 0 | 10
  11841. 2025-07-20 15:30:54,376 - sglang - INFO - [2025-07-20 15:30:54 TP0] Decode batch. #running-req: 11, #token: 28675, token usage: 0.75, gen throughput (token/s): 251.85, #queue-req: 394
  11842. 2025-07-20 15:30:54,376 - __main__ - INFO - sglang running req: 11 queue req: 394
  11843. 2025-07-20 15:30:54,795 - sglang - INFO - [2025-07-20 15:30:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2105, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 393
  11844. 2025-07-20 15:30:54,796 - __main__ - INFO - sglang running req: 10 queue req: 393
  11845. 2025-07-20 15:30:55,813 - sglang - INFO - [2025-07-20 15:30:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 392
  11846. 2025-07-20 15:30:55,814 - __main__ - INFO - sglang running req: 10 queue req: 392
  11847. 2025-07-20 15:30:56,875 - sglang - INFO - [2025-07-20 15:30:56 TP0] Decode batch. #running-req: 11, #token: 31009, token usage: 0.82, gen throughput (token/s): 175.26, #queue-req: 392
  11848. 2025-07-20 15:30:56,875 - __main__ - INFO - sglang running req: 11 queue req: 392
  11849. 2025-07-20 15:30:57,865 - sglang - INFO - [2025-07-20 15:30:57 TP0] Decode batch. #running-req: 11, #token: 31449, token usage: 0.83, gen throughput (token/s): 444.28, #queue-req: 392
  11850. 2025-07-20 15:30:57,865 - __main__ - INFO - sglang running req: 11 queue req: 392
  11851. 2025-07-20 15:30:58,913 - sglang - INFO - [2025-07-20 15:30:58 TP0] Decode batch. #running-req: 11, #token: 31889, token usage: 0.84, gen throughput (token/s): 419.80, #queue-req: 392
  11852. 2025-07-20 15:30:58,913 - __main__ - INFO - sglang running req: 11 queue req: 392
  11853. 2025-07-20 15:30:59,911 - sglang - INFO - [2025-07-20 15:30:59 TP0] Decode batch. #running-req: 11, #token: 32329, token usage: 0.85, gen throughput (token/s): 441.24, #queue-req: 392
  11854. 2025-07-20 15:30:59,911 - __main__ - INFO - sglang running req: 11 queue req: 392
  11855. 2025-07-20 15:31:00,961 - sglang - INFO - [2025-07-20 15:31:00 TP0] Decode batch. #running-req: 11, #token: 32769, token usage: 0.86, gen throughput (token/s): 419.07, #queue-req: 392
  11856. 2025-07-20 15:31:00,961 - __main__ - INFO - sglang running req: 11 queue req: 392
  11857. 2025-07-20 15:31:01,959 - sglang - INFO - [2025-07-20 15:31:01 TP0] Decode batch. #running-req: 11, #token: 33209, token usage: 0.87, gen throughput (token/s): 440.84, #queue-req: 392
  11858. 2025-07-20 15:31:01,959 - __main__ - INFO - sglang running req: 11 queue req: 392
  11859. 2025-07-20 15:31:02,956 - sglang - INFO - [2025-07-20 15:31:02 TP0] Decode batch. #running-req: 11, #token: 33649, token usage: 0.89, gen throughput (token/s): 440.90, #queue-req: 392
  11860. 2025-07-20 15:31:02,957 - __main__ - INFO - sglang running req: 11 queue req: 392
  11861. 2025-07-20 15:31:03,481 - sglang - INFO - [2025-07-20 15:31:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 391
  11862. 2025-07-20 15:31:03,481 - __main__ - INFO - sglang running req: 10 queue req: 391
  11863. 2025-07-20 15:31:03,698 - __main__ - INFO - Queue remaining: 2
  11864. 2025-07-20 15:31:03,698 - __main__ - INFO -
  11865. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11866. ----------------------------------------------------------------------------------
  11867. sglang_input_tokens 688.29 848.59
  11868. sglang_output_tokens 198.10 244.24
  11869. 2025-07-20 15:31:03,698 - __main__ - INFO -
  11870. Worker ID | finished | started
  11871. ----------+----------+--------
  11872. 0 | 108 | 500
  11873. 1 | 0 | 10
  11874. 2025-07-20 15:31:04,625 - sglang - INFO - [2025-07-20 15:31:04 TP0] Decode batch. #running-req: 10, #token: 30265, token usage: 0.80, gen throughput (token/s): 254.16, #queue-req: 391
  11875. 2025-07-20 15:31:04,625 - __main__ - INFO - sglang running req: 10 queue req: 391
  11876. 2025-07-20 15:31:05,607 - sglang - INFO - [2025-07-20 15:31:05 TP0] Decode batch. #running-req: 10, #token: 30665, token usage: 0.81, gen throughput (token/s): 407.03, #queue-req: 391
  11877. 2025-07-20 15:31:05,608 - __main__ - INFO - sglang running req: 10 queue req: 391
  11878. 2025-07-20 15:31:06,595 - sglang - INFO - [2025-07-20 15:31:06 TP0] Decode batch. #running-req: 10, #token: 31065, token usage: 0.82, gen throughput (token/s): 404.96, #queue-req: 391
  11879. 2025-07-20 15:31:06,595 - __main__ - INFO - sglang running req: 10 queue req: 391
  11880. 2025-07-20 15:31:06,819 - sglang - INFO - [2025-07-20 15:31:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 390
  11881. 2025-07-20 15:31:06,819 - __main__ - INFO - sglang running req: 9 queue req: 390
  11882. 2025-07-20 15:31:08,421 - sglang - INFO - [2025-07-20 15:31:08 TP0] Decode batch. #running-req: 10, #token: 31852, token usage: 0.84, gen throughput (token/s): 218.58, #queue-req: 390
  11883. 2025-07-20 15:31:08,421 - __main__ - INFO - sglang running req: 10 queue req: 390
  11884. 2025-07-20 15:31:08,842 - sglang - INFO - [2025-07-20 15:31:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 389
  11885. 2025-07-20 15:31:08,842 - __main__ - INFO - sglang running req: 9 queue req: 389
  11886. 2025-07-20 15:31:10,201 - sglang - INFO - [2025-07-20 15:31:10 TP0] Decode batch. #running-req: 10, #token: 30795, token usage: 0.81, gen throughput (token/s): 224.15, #queue-req: 389
  11887. 2025-07-20 15:31:10,201 - __main__ - INFO - sglang running req: 10 queue req: 389
  11888. 2025-07-20 15:31:10,374 - sglang - INFO - [2025-07-20 15:31:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2601, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 388
  11889. 2025-07-20 15:31:10,374 - __main__ - INFO - sglang running req: 9 queue req: 388
  11890. 2025-07-20 15:31:11,462 - sglang - INFO - [2025-07-20 15:31:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1910, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 387
  11891. 2025-07-20 15:31:11,463 - __main__ - INFO - sglang running req: 9 queue req: 387
  11892. 2025-07-20 15:31:12,410 - sglang - INFO - [2025-07-20 15:31:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 386
  11893. 2025-07-20 15:31:12,410 - __main__ - INFO - sglang running req: 9 queue req: 386
  11894. 2025-07-20 15:31:13,359 - sglang - INFO - [2025-07-20 15:31:13 TP0] Decode batch. #running-req: 10, #token: 28006, token usage: 0.74, gen throughput (token/s): 125.70, #queue-req: 386
  11895. 2025-07-20 15:31:13,359 - __main__ - INFO - sglang running req: 10 queue req: 386
  11896. 2025-07-20 15:31:13,699 - __main__ - INFO - Queue remaining: 2
  11897. 2025-07-20 15:31:13,699 - __main__ - INFO -
  11898. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11899. ----------------------------------------------------------------------------------
  11900. sglang_input_tokens 708.70 897.38
  11901. sglang_output_tokens 203.46 257.63
  11902. 2025-07-20 15:31:13,700 - __main__ - INFO -
  11903. Worker ID | finished | started
  11904. ----------+----------+--------
  11905. 0 | 114 | 500
  11906. 1 | 0 | 10
  11907. 2025-07-20 15:31:14,343 - sglang - INFO - [2025-07-20 15:31:14 TP0] Decode batch. #running-req: 10, #token: 28406, token usage: 0.75, gen throughput (token/s): 406.66, #queue-req: 386
  11908. 2025-07-20 15:31:14,343 - __main__ - INFO - sglang running req: 10 queue req: 386
  11909. 2025-07-20 15:31:14,984 - sglang - INFO - [2025-07-20 15:31:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 385
  11910. 2025-07-20 15:31:14,985 - __main__ - INFO - sglang running req: 9 queue req: 385
  11911. 2025-07-20 15:31:16,121 - sglang - INFO - [2025-07-20 15:31:16 TP0] Decode batch. #running-req: 10, #token: 28182, token usage: 0.74, gen throughput (token/s): 224.38, #queue-req: 385
  11912. 2025-07-20 15:31:16,121 - __main__ - INFO - sglang running req: 10 queue req: 385
  11913. 2025-07-20 15:31:17,056 - sglang - INFO - [2025-07-20 15:31:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2146, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 384
  11914. 2025-07-20 15:31:17,056 - __main__ - INFO - sglang running req: 9 queue req: 384
  11915. 2025-07-20 15:31:17,779 - sglang - INFO - [2025-07-20 15:31:17 TP0] Decode batch. #running-req: 10, #token: 26960, token usage: 0.71, gen throughput (token/s): 240.69, #queue-req: 384
  11916. 2025-07-20 15:31:17,779 - __main__ - INFO - sglang running req: 10 queue req: 384
  11917. 2025-07-20 15:31:18,758 - sglang - INFO - [2025-07-20 15:31:18 TP0] Decode batch. #running-req: 10, #token: 27360, token usage: 0.72, gen throughput (token/s): 408.35, #queue-req: 384
  11918. 2025-07-20 15:31:18,758 - __main__ - INFO - sglang running req: 10 queue req: 384
  11919. 2025-07-20 15:31:19,394 - sglang - INFO - [2025-07-20 15:31:19 TP0] Prefill batch. #new-seq: 2, #new-token: 4498, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 382
  11920. 2025-07-20 15:31:19,394 - __main__ - INFO - sglang running req: 9 queue req: 382
  11921. 2025-07-20 15:31:21,152 - sglang - INFO - [2025-07-20 15:31:21 TP0] Decode batch. #running-req: 11, #token: 29038, token usage: 0.76, gen throughput (token/s): 172.55, #queue-req: 382
  11922. 2025-07-20 15:31:21,152 - __main__ - INFO - sglang running req: 11 queue req: 382
  11923. 2025-07-20 15:31:22,140 - sglang - INFO - [2025-07-20 15:31:22 TP0] Decode batch. #running-req: 11, #token: 29478, token usage: 0.78, gen throughput (token/s): 444.96, #queue-req: 382
  11924. 2025-07-20 15:31:22,141 - __main__ - INFO - sglang running req: 11 queue req: 382
  11925. 2025-07-20 15:31:23,130 - sglang - INFO - [2025-07-20 15:31:23 TP0] Decode batch. #running-req: 11, #token: 29918, token usage: 0.79, gen throughput (token/s): 444.63, #queue-req: 382
  11926. 2025-07-20 15:31:23,130 - __main__ - INFO - sglang running req: 11 queue req: 382
  11927. 2025-07-20 15:31:23,700 - __main__ - INFO - Queue remaining: 2
  11928. 2025-07-20 15:31:23,701 - __main__ - INFO -
  11929. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11930. ----------------------------------------------------------------------------------
  11931. sglang_input_tokens 710.60 923.48
  11932. sglang_output_tokens 204.57 265.86
  11933. 2025-07-20 15:31:23,701 - __main__ - INFO -
  11934. Worker ID | finished | started
  11935. ----------+----------+--------
  11936. 0 | 117 | 500
  11937. 1 | 0 | 10
  11938. 2025-07-20 15:31:24,120 - sglang - INFO - [2025-07-20 15:31:24 TP0] Decode batch. #running-req: 11, #token: 30358, token usage: 0.80, gen throughput (token/s): 444.37, #queue-req: 382
  11939. 2025-07-20 15:31:24,120 - __main__ - INFO - sglang running req: 11 queue req: 382
  11940. 2025-07-20 15:31:25,111 - sglang - INFO - [2025-07-20 15:31:25 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 444.03, #queue-req: 382
  11941. 2025-07-20 15:31:25,111 - __main__ - INFO - sglang running req: 11 queue req: 382
  11942. 2025-07-20 15:31:25,930 - sglang - INFO - [2025-07-20 15:31:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2719, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 381
  11943. 2025-07-20 15:31:25,930 - __main__ - INFO - sglang running req: 10 queue req: 381
  11944. 2025-07-20 15:31:26,910 - sglang - INFO - [2025-07-20 15:31:26 TP0] Decode batch. #running-req: 11, #token: 30136, token usage: 0.79, gen throughput (token/s): 244.02, #queue-req: 381
  11945. 2025-07-20 15:31:26,910 - __main__ - INFO - sglang running req: 11 queue req: 381
  11946. 2025-07-20 15:31:27,897 - sglang - INFO - [2025-07-20 15:31:27 TP0] Decode batch. #running-req: 11, #token: 30576, token usage: 0.80, gen throughput (token/s): 445.84, #queue-req: 381
  11947. 2025-07-20 15:31:27,897 - __main__ - INFO - sglang running req: 11 queue req: 381
  11948. 2025-07-20 15:31:28,691 - sglang - INFO - [2025-07-20 15:31:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 380
  11949. 2025-07-20 15:31:28,691 - __main__ - INFO - sglang running req: 10 queue req: 380
  11950. 2025-07-20 15:31:29,710 - sglang - INFO - [2025-07-20 15:31:29 TP0] Decode batch. #running-req: 11, #token: 30919, token usage: 0.81, gen throughput (token/s): 242.16, #queue-req: 380
  11951. 2025-07-20 15:31:29,710 - __main__ - INFO - sglang running req: 11 queue req: 380
  11952. 2025-07-20 15:31:29,909 - sglang - INFO - [2025-07-20 15:31:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 379
  11953. 2025-07-20 15:31:29,909 - __main__ - INFO - sglang running req: 10 queue req: 379
  11954. 2025-07-20 15:31:31,215 - sglang - INFO - [2025-07-20 15:31:31 TP0] Decode batch. #running-req: 11, #token: 29777, token usage: 0.78, gen throughput (token/s): 291.70, #queue-req: 379
  11955. 2025-07-20 15:31:31,215 - __main__ - INFO - sglang running req: 11 queue req: 379
  11956. 2025-07-20 15:31:31,870 - sglang - INFO - [2025-07-20 15:31:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 378
  11957. 2025-07-20 15:31:31,870 - __main__ - INFO - sglang running req: 10 queue req: 378
  11958. 2025-07-20 15:31:33,031 - sglang - INFO - [2025-07-20 15:31:33 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 241.75, #queue-req: 378
  11959. 2025-07-20 15:31:33,031 - __main__ - INFO - sglang running req: 11 queue req: 378
  11960. 2025-07-20 15:31:33,607 - sglang - INFO - [2025-07-20 15:31:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2203, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 377
  11961. 2025-07-20 15:31:33,607 - __main__ - INFO - sglang running req: 10 queue req: 377
  11962. 2025-07-20 15:31:33,702 - __main__ - INFO - Queue remaining: 2
  11963. 2025-07-20 15:31:33,703 - __main__ - INFO -
  11964. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11965. ----------------------------------------------------------------------------------
  11966. sglang_input_tokens 720.43 960.27
  11967. sglang_output_tokens 207.03 275.96
  11968. 2025-07-20 15:31:33,703 - __main__ - INFO -
  11969. Worker ID | finished | started
  11970. ----------+----------+--------
  11971. 0 | 122 | 500
  11972. 1 | 0 | 10
  11973. 2025-07-20 15:31:34,385 - sglang - INFO - [2025-07-20 15:31:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 376
  11974. 2025-07-20 15:31:34,385 - __main__ - INFO - sglang running req: 10 queue req: 376
  11975. 2025-07-20 15:31:35,572 - sglang - INFO - [2025-07-20 15:31:35 TP0] Decode batch. #running-req: 11, #token: 29844, token usage: 0.79, gen throughput (token/s): 172.33, #queue-req: 376
  11976. 2025-07-20 15:31:35,573 - __main__ - INFO - sglang running req: 11 queue req: 376
  11977. 2025-07-20 15:31:36,569 - sglang - INFO - [2025-07-20 15:31:36 TP0] Decode batch. #running-req: 11, #token: 30284, token usage: 0.80, gen throughput (token/s): 441.66, #queue-req: 376
  11978. 2025-07-20 15:31:36,569 - __main__ - INFO - sglang running req: 11 queue req: 376
  11979. 2025-07-20 15:31:36,718 - sglang - INFO - [2025-07-20 15:31:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2144, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 375
  11980. 2025-07-20 15:31:36,718 - __main__ - INFO - sglang running req: 10 queue req: 375
  11981. 2025-07-20 15:31:38,234 - sglang - INFO - [2025-07-20 15:31:38 TP0] Decode batch. #running-req: 11, #token: 31439, token usage: 0.83, gen throughput (token/s): 263.67, #queue-req: 375
  11982. 2025-07-20 15:31:38,234 - __main__ - INFO - sglang running req: 11 queue req: 375
  11983. 2025-07-20 15:31:39,226 - sglang - INFO - [2025-07-20 15:31:39 TP0] Decode batch. #running-req: 11, #token: 31879, token usage: 0.84, gen throughput (token/s): 443.31, #queue-req: 375
  11984. 2025-07-20 15:31:39,227 - __main__ - INFO - sglang running req: 11 queue req: 375
  11985. 2025-07-20 15:31:40,220 - sglang - INFO - [2025-07-20 15:31:40 TP0] Decode batch. #running-req: 11, #token: 32319, token usage: 0.85, gen throughput (token/s): 442.71, #queue-req: 375
  11986. 2025-07-20 15:31:40,220 - __main__ - INFO - sglang running req: 11 queue req: 375
  11987. 2025-07-20 15:31:40,444 - sglang - INFO - [2025-07-20 15:31:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 374
  11988. 2025-07-20 15:31:40,444 - __main__ - INFO - sglang running req: 10 queue req: 374
  11989. 2025-07-20 15:31:42,020 - sglang - INFO - [2025-07-20 15:31:42 TP0] Decode batch. #running-req: 11, #token: 32256, token usage: 0.85, gen throughput (token/s): 243.86, #queue-req: 374
  11990. 2025-07-20 15:31:42,021 - __main__ - INFO - sglang running req: 11 queue req: 374
  11991. 2025-07-20 15:31:42,864 - sglang - INFO - [2025-07-20 15:31:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1804, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 373
  11992. 2025-07-20 15:31:42,864 - __main__ - INFO - sglang running req: 10 queue req: 373
  11993. 2025-07-20 15:31:43,662 - sglang - INFO - [2025-07-20 15:31:43 TP0] Decode batch. #running-req: 11, #token: 31638, token usage: 0.83, gen throughput (token/s): 267.48, #queue-req: 373
  11994. 2025-07-20 15:31:43,662 - __main__ - INFO - sglang running req: 11 queue req: 373
  11995. 2025-07-20 15:31:43,703 - __main__ - INFO - Queue remaining: 2
  11996. 2025-07-20 15:31:43,704 - __main__ - INFO -
  11997. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  11998. ----------------------------------------------------------------------------------
  11999. sglang_input_tokens 723.32 983.69
  12000. sglang_output_tokens 207.65 283.21
  12001. 2025-07-20 15:31:43,704 - __main__ - INFO -
  12002. Worker ID | finished | started
  12003. ----------+----------+--------
  12004. 0 | 126 | 500
  12005. 1 | 0 | 10
  12006. 2025-07-20 15:31:44,655 - sglang - INFO - [2025-07-20 15:31:44 TP0] Decode batch. #running-req: 11, #token: 32078, token usage: 0.84, gen throughput (token/s): 443.07, #queue-req: 373
  12007. 2025-07-20 15:31:44,655 - __main__ - INFO - sglang running req: 11 queue req: 373
  12008. 2025-07-20 15:31:44,929 - sglang - INFO - [2025-07-20 15:31:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 372
  12009. 2025-07-20 15:31:44,930 - __main__ - INFO - sglang running req: 10 queue req: 372
  12010. 2025-07-20 15:31:46,440 - sglang - INFO - [2025-07-20 15:31:46 TP0] Decode batch. #running-req: 11, #token: 31630, token usage: 0.83, gen throughput (token/s): 245.87, #queue-req: 372
  12011. 2025-07-20 15:31:46,440 - __main__ - INFO - sglang running req: 11 queue req: 372
  12012. 2025-07-20 15:31:47,435 - sglang - INFO - [2025-07-20 15:31:47 TP0] Decode batch. #running-req: 11, #token: 32070, token usage: 0.84, gen throughput (token/s): 442.35, #queue-req: 372
  12013. 2025-07-20 15:31:47,435 - __main__ - INFO - sglang running req: 11 queue req: 372
  12014. 2025-07-20 15:31:48,432 - sglang - INFO - [2025-07-20 15:31:48 TP0] Decode batch. #running-req: 11, #token: 32510, token usage: 0.86, gen throughput (token/s): 441.35, #queue-req: 372
  12015. 2025-07-20 15:31:48,432 - __main__ - INFO - sglang running req: 11 queue req: 372
  12016. 2025-07-20 15:31:49,353 - sglang - INFO - [2025-07-20 15:31:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2077, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 371
  12017. 2025-07-20 15:31:49,353 - __main__ - INFO - sglang running req: 10 queue req: 371
  12018. 2025-07-20 15:31:50,112 - sglang - INFO - [2025-07-20 15:31:50 TP0] Decode batch. #running-req: 11, #token: 32156, token usage: 0.85, gen throughput (token/s): 261.26, #queue-req: 371
  12019. 2025-07-20 15:31:50,112 - __main__ - INFO - sglang running req: 11 queue req: 371
  12020. 2025-07-20 15:31:50,187 - sglang - INFO - [2025-07-20 15:31:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2512, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 370
  12021. 2025-07-20 15:31:50,187 - __main__ - INFO - sglang running req: 10 queue req: 370
  12022. 2025-07-20 15:31:51,863 - sglang - INFO - [2025-07-20 15:31:51 TP0] Decode batch. #running-req: 11, #token: 31513, token usage: 0.83, gen throughput (token/s): 250.68, #queue-req: 370
  12023. 2025-07-20 15:31:51,863 - __main__ - INFO - sglang running req: 11 queue req: 370
  12024. 2025-07-20 15:31:52,857 - sglang - INFO - [2025-07-20 15:31:52 TP0] Decode batch. #running-req: 11, #token: 31953, token usage: 0.84, gen throughput (token/s): 442.77, #queue-req: 370
  12025. 2025-07-20 15:31:52,857 - __main__ - INFO - sglang running req: 11 queue req: 370
  12026. 2025-07-20 15:31:53,706 - __main__ - INFO - Queue remaining: 2
  12027. 2025-07-20 15:31:53,706 - __main__ - INFO -
  12028. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12029. ----------------------------------------------------------------------------------
  12030. sglang_input_tokens 724.46 996.42
  12031. sglang_output_tokens 208.07 287.35
  12032. 2025-07-20 15:31:53,706 - __main__ - INFO -
  12033. Worker ID | finished | started
  12034. ----------+----------+--------
  12035. 0 | 129 | 500
  12036. 1 | 0 | 10
  12037. 2025-07-20 15:31:53,851 - sglang - INFO - [2025-07-20 15:31:53 TP0] Decode batch. #running-req: 11, #token: 32393, token usage: 0.85, gen throughput (token/s): 442.62, #queue-req: 370
  12038. 2025-07-20 15:31:53,851 - __main__ - INFO - sglang running req: 11 queue req: 370
  12039. 2025-07-20 15:31:54,846 - sglang - INFO - [2025-07-20 15:31:54 TP0] Decode batch. #running-req: 11, #token: 32833, token usage: 0.86, gen throughput (token/s): 442.12, #queue-req: 370
  12040. 2025-07-20 15:31:54,847 - __main__ - INFO - sglang running req: 11 queue req: 370
  12041. 2025-07-20 15:31:55,840 - sglang - INFO - [2025-07-20 15:31:55 TP0] Decode batch. #running-req: 10, #token: 30122, token usage: 0.79, gen throughput (token/s): 431.74, #queue-req: 370
  12042. 2025-07-20 15:31:55,840 - __main__ - INFO - sglang running req: 10 queue req: 370
  12043. 2025-07-20 15:31:56,826 - sglang - INFO - [2025-07-20 15:31:56 TP0] Decode batch. #running-req: 10, #token: 30522, token usage: 0.80, gen throughput (token/s): 405.77, #queue-req: 370
  12044. 2025-07-20 15:31:56,826 - __main__ - INFO - sglang running req: 10 queue req: 370
  12045. 2025-07-20 15:31:57,295 - sglang - INFO - [2025-07-20 15:31:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2714, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 369
  12046. 2025-07-20 15:31:57,295 - __main__ - INFO - sglang running req: 9 queue req: 369
  12047. 2025-07-20 15:31:58,126 - sglang - INFO - [2025-07-20 15:31:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2543, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 368
  12048. 2025-07-20 15:31:58,127 - __main__ - INFO - sglang running req: 9 queue req: 368
  12049. 2025-07-20 15:31:59,380 - sglang - INFO - [2025-07-20 15:31:59 TP0] Decode batch. #running-req: 10, #token: 30284, token usage: 0.80, gen throughput (token/s): 155.84, #queue-req: 368
  12050. 2025-07-20 15:31:59,380 - __main__ - INFO - sglang running req: 10 queue req: 368
  12051. 2025-07-20 15:32:00,146 - sglang - INFO - [2025-07-20 15:32:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 367
  12052. 2025-07-20 15:32:00,146 - __main__ - INFO - sglang running req: 9 queue req: 367
  12053. 2025-07-20 15:32:01,162 - sglang - INFO - [2025-07-20 15:32:01 TP0] Decode batch. #running-req: 9, #token: 27349, token usage: 0.72, gen throughput (token/s): 223.34, #queue-req: 367
  12054. 2025-07-20 15:32:01,162 - __main__ - INFO - sglang running req: 9 queue req: 367
  12055. 2025-07-20 15:32:01,162 - sglang - INFO - [2025-07-20 15:32:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2355, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 366
  12056. 2025-07-20 15:32:01,162 - __main__ - INFO - sglang running req: 9 queue req: 366
  12057. 2025-07-20 15:32:02,908 - sglang - INFO - [2025-07-20 15:32:02 TP0] Decode batch. #running-req: 10, #token: 30104, token usage: 0.79, gen throughput (token/s): 229.07, #queue-req: 366
  12058. 2025-07-20 15:32:02,908 - __main__ - INFO - sglang running req: 10 queue req: 366
  12059. 2025-07-20 15:32:03,353 - sglang - INFO - [2025-07-20 15:32:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2836, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 365
  12060. 2025-07-20 15:32:03,354 - __main__ - INFO - sglang running req: 9 queue req: 365
  12061. 2025-07-20 15:32:03,707 - __main__ - INFO - Queue remaining: 2
  12062. 2025-07-20 15:32:03,707 - __main__ - INFO -
  12063. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12064. ----------------------------------------------------------------------------------
  12065. sglang_input_tokens 740.32 1012.26
  12066. sglang_output_tokens 213.39 293.96
  12067. 2025-07-20 15:32:03,707 - __main__ - INFO -
  12068. Worker ID | finished | started
  12069. ----------+----------+--------
  12070. 0 | 135 | 500
  12071. 1 | 0 | 10
  12072. 2025-07-20 15:32:04,724 - sglang - INFO - [2025-07-20 15:32:04 TP0] Decode batch. #running-req: 10, #token: 29767, token usage: 0.78, gen throughput (token/s): 219.74, #queue-req: 365
  12073. 2025-07-20 15:32:04,724 - __main__ - INFO - sglang running req: 10 queue req: 365
  12074. 2025-07-20 15:32:05,705 - sglang - INFO - [2025-07-20 15:32:05 TP0] Decode batch. #running-req: 10, #token: 30167, token usage: 0.79, gen throughput (token/s): 407.66, #queue-req: 365
  12075. 2025-07-20 15:32:05,705 - __main__ - INFO - sglang running req: 10 queue req: 365
  12076. 2025-07-20 15:32:06,380 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2C47380T4440125017008.pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
  12077. 2025-07-20 15:32:06,395 - sglang - INFO - [2025-07-20 15:32:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 364
  12078. 2025-07-20 15:32:06,395 - __main__ - INFO - sglang running req: 9 queue req: 364
  12079. 2025-07-20 15:32:06,596 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008.pdf-12
  12080. 2025-07-20 15:32:07,503 - sglang - INFO - [2025-07-20 15:32:07 TP0] Decode batch. #running-req: 10, #token: 28784, token usage: 0.76, gen throughput (token/s): 221.89, #queue-req: 365
  12081. 2025-07-20 15:32:07,503 - __main__ - INFO - sglang running req: 10 queue req: 365
  12082. 2025-07-20 15:32:07,651 - sglang - INFO - [2025-07-20 15:32:07 TP0] Prefill batch. #new-seq: 2, #new-token: 3524, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 363
  12083. 2025-07-20 15:32:07,651 - __main__ - INFO - sglang running req: 9 queue req: 363
  12084. 2025-07-20 15:32:09,687 - sglang - INFO - [2025-07-20 15:32:09 TP0] Decode batch. #running-req: 11, #token: 29499, token usage: 0.78, gen throughput (token/s): 198.24, #queue-req: 363
  12085. 2025-07-20 15:32:09,688 - __main__ - INFO - sglang running req: 11 queue req: 363
  12086. 2025-07-20 15:32:10,676 - sglang - INFO - [2025-07-20 15:32:10 TP0] Decode batch. #running-req: 11, #token: 29939, token usage: 0.79, gen throughput (token/s): 445.05, #queue-req: 363
  12087. 2025-07-20 15:32:10,676 - __main__ - INFO - sglang running req: 11 queue req: 363
  12088. 2025-07-20 15:32:11,516 - sglang - INFO - [2025-07-20 15:32:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2575, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 362
  12089. 2025-07-20 15:32:11,516 - __main__ - INFO - sglang running req: 10 queue req: 362
  12090. 2025-07-20 15:32:12,452 - sglang - INFO - [2025-07-20 15:32:12 TP0] Decode batch. #running-req: 11, #token: 29388, token usage: 0.77, gen throughput (token/s): 247.19, #queue-req: 362
  12091. 2025-07-20 15:32:12,452 - __main__ - INFO - sglang running req: 11 queue req: 362
  12092. 2025-07-20 15:32:12,772 - sglang - INFO - [2025-07-20 15:32:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2406, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 361
  12093. 2025-07-20 15:32:12,772 - __main__ - INFO - sglang running req: 10 queue req: 361
  12094. 2025-07-20 15:32:13,709 - __main__ - INFO - Queue remaining: 2
  12095. 2025-07-20 15:32:13,709 - __main__ - INFO -
  12096. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12097. ----------------------------------------------------------------------------------
  12098. sglang_input_tokens 746.84 1005.86
  12099. sglang_output_tokens 217.03 294.92
  12100. 2025-07-20 15:32:13,709 - __main__ - INFO -
  12101. Worker ID | finished | started
  12102. ----------+----------+--------
  12103. 0 | 138 | 500
  12104. 1 | 0 | 10
  12105. 2025-07-20 15:32:14,174 - sglang - INFO - [2025-07-20 15:32:14 TP0] Decode batch. #running-req: 11, #token: 29541, token usage: 0.78, gen throughput (token/s): 254.89, #queue-req: 361
  12106. 2025-07-20 15:32:14,174 - __main__ - INFO - sglang running req: 11 queue req: 361
  12107. 2025-07-20 15:32:15,163 - sglang - INFO - [2025-07-20 15:32:15 TP0] Decode batch. #running-req: 11, #token: 29981, token usage: 0.79, gen throughput (token/s): 444.79, #queue-req: 361
  12108. 2025-07-20 15:32:15,164 - __main__ - INFO - sglang running req: 11 queue req: 361
  12109. 2025-07-20 15:32:15,238 - sglang - INFO - [2025-07-20 15:32:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2100, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 360
  12110. 2025-07-20 15:32:15,238 - __main__ - INFO - sglang running req: 10 queue req: 360
  12111. 2025-07-20 15:32:16,842 - sglang - INFO - [2025-07-20 15:32:16 TP0] Decode batch. #running-req: 11, #token: 30966, token usage: 0.82, gen throughput (token/s): 261.56, #queue-req: 360
  12112. 2025-07-20 15:32:16,842 - __main__ - INFO - sglang running req: 11 queue req: 360
  12113. 2025-07-20 15:32:17,735 - sglang - INFO - [2025-07-20 15:32:17 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 359
  12114. 2025-07-20 15:32:17,735 - __main__ - INFO - sglang running req: 10 queue req: 359
  12115. 2025-07-20 15:32:18,652 - sglang - INFO - [2025-07-20 15:32:18 TP0] Decode batch. #running-req: 11, #token: 30927, token usage: 0.81, gen throughput (token/s): 243.53, #queue-req: 359
  12116. 2025-07-20 15:32:18,652 - __main__ - INFO - sglang running req: 11 queue req: 359
  12117. 2025-07-20 15:32:19,681 - sglang - INFO - [2025-07-20 15:32:19 TP0] Decode batch. #running-req: 11, #token: 31367, token usage: 0.83, gen throughput (token/s): 424.66, #queue-req: 359
  12118. 2025-07-20 15:32:19,681 - __main__ - INFO - sglang running req: 11 queue req: 359
  12119. 2025-07-20 15:32:20,670 - sglang - INFO - [2025-07-20 15:32:20 TP0] Decode batch. #running-req: 11, #token: 31807, token usage: 0.84, gen throughput (token/s): 444.85, #queue-req: 359
  12120. 2025-07-20 15:32:20,670 - __main__ - INFO - sglang running req: 11 queue req: 359
  12121. 2025-07-20 15:32:21,659 - sglang - INFO - [2025-07-20 15:32:21 TP0] Decode batch. #running-req: 11, #token: 32247, token usage: 0.85, gen throughput (token/s): 444.94, #queue-req: 359
  12122. 2025-07-20 15:32:21,659 - __main__ - INFO - sglang running req: 11 queue req: 359
  12123. 2025-07-20 15:32:21,932 - sglang - INFO - [2025-07-20 15:32:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2768, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 358
  12124. 2025-07-20 15:32:21,933 - __main__ - INFO - sglang running req: 10 queue req: 358
  12125. 2025-07-20 15:32:23,467 - sglang - INFO - [2025-07-20 15:32:23 TP0] Decode batch. #running-req: 11, #token: 32281, token usage: 0.85, gen throughput (token/s): 242.81, #queue-req: 358
  12126. 2025-07-20 15:32:23,467 - __main__ - INFO - sglang running req: 11 queue req: 358
  12127. 2025-07-20 15:32:23,710 - __main__ - INFO - Queue remaining: 2
  12128. 2025-07-20 15:32:23,711 - __main__ - INFO -
  12129. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12130. ----------------------------------------------------------------------------------
  12131. sglang_input_tokens 744.49 997.11
  12132. sglang_output_tokens 215.72 290.57
  12133. 2025-07-20 15:32:23,711 - __main__ - INFO -
  12134. Worker ID | finished | started
  12135. ----------+----------+--------
  12136. 0 | 141 | 500
  12137. 1 | 0 | 10
  12138. 2025-07-20 15:32:24,463 - sglang - INFO - [2025-07-20 15:32:24 TP0] Decode batch. #running-req: 11, #token: 32721, token usage: 0.86, gen throughput (token/s): 441.49, #queue-req: 358
  12139. 2025-07-20 15:32:24,463 - __main__ - INFO - sglang running req: 11 queue req: 358
  12140. 2025-07-20 15:32:25,463 - sglang - INFO - [2025-07-20 15:32:25 TP0] Decode batch. #running-req: 11, #token: 33161, token usage: 0.87, gen throughput (token/s): 440.22, #queue-req: 358
  12141. 2025-07-20 15:32:25,463 - __main__ - INFO - sglang running req: 11 queue req: 358
  12142. 2025-07-20 15:32:25,762 - sglang - INFO - [2025-07-20 15:32:25 TP0] Prefill batch. #new-seq: 1, #new-token: 1751, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 357
  12143. 2025-07-20 15:32:25,763 - __main__ - INFO - sglang running req: 10 queue req: 357
  12144. 2025-07-20 15:32:26,472 - sglang - INFO - [2025-07-20 15:32:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2252, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 356
  12145. 2025-07-20 15:32:26,472 - __main__ - INFO - sglang running req: 10 queue req: 356
  12146. 2025-07-20 15:32:27,795 - sglang - INFO - [2025-07-20 15:32:27 TP0] Decode batch. #running-req: 11, #token: 31748, token usage: 0.84, gen throughput (token/s): 187.79, #queue-req: 356
  12147. 2025-07-20 15:32:27,795 - __main__ - INFO - sglang running req: 11 queue req: 356
  12148. 2025-07-20 15:32:28,069 - sglang - INFO - [2025-07-20 15:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1580, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 355
  12149. 2025-07-20 15:32:28,069 - __main__ - INFO - sglang running req: 10 queue req: 355
  12150. 2025-07-20 15:32:28,849 - sglang - INFO - [2025-07-20 15:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2805, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 354
  12151. 2025-07-20 15:32:28,849 - __main__ - INFO - sglang running req: 10 queue req: 354
  12152. 2025-07-20 15:32:30,181 - sglang - INFO - [2025-07-20 15:32:30 TP0] Decode batch. #running-req: 11, #token: 30600, token usage: 0.81, gen throughput (token/s): 183.56, #queue-req: 354
  12153. 2025-07-20 15:32:30,181 - __main__ - INFO - sglang running req: 11 queue req: 354
  12154. 2025-07-20 15:32:30,454 - sglang - INFO - [2025-07-20 15:32:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1897, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 353
  12155. 2025-07-20 15:32:30,455 - __main__ - INFO - sglang running req: 10 queue req: 353
  12156. 2025-07-20 15:32:31,822 - sglang - INFO - [2025-07-20 15:32:31 TP0] Decode batch. #running-req: 11, #token: 29391, token usage: 0.77, gen throughput (token/s): 267.45, #queue-req: 353
  12157. 2025-07-20 15:32:31,823 - __main__ - INFO - sglang running req: 11 queue req: 353
  12158. 2025-07-20 15:32:32,811 - sglang - INFO - [2025-07-20 15:32:32 TP0] Decode batch. #running-req: 11, #token: 29831, token usage: 0.79, gen throughput (token/s): 445.18, #queue-req: 353
  12159. 2025-07-20 15:32:32,811 - __main__ - INFO - sglang running req: 11 queue req: 353
  12160. 2025-07-20 15:32:33,712 - __main__ - INFO - Queue remaining: 2
  12161. 2025-07-20 15:32:33,713 - __main__ - INFO -
  12162. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12163. ----------------------------------------------------------------------------------
  12164. sglang_input_tokens 754.69 1024.19
  12165. sglang_output_tokens 218.07 297.92
  12166. 2025-07-20 15:32:33,713 - __main__ - INFO -
  12167. Worker ID | finished | started
  12168. ----------+----------+--------
  12169. 0 | 146 | 500
  12170. 1 | 0 | 10
  12171. 2025-07-20 15:32:33,800 - sglang - INFO - [2025-07-20 15:32:33 TP0] Decode batch. #running-req: 11, #token: 30271, token usage: 0.80, gen throughput (token/s): 444.69, #queue-req: 353
  12172. 2025-07-20 15:32:33,801 - __main__ - INFO - sglang running req: 11 queue req: 353
  12173. 2025-07-20 15:32:34,444 - sglang - INFO - [2025-07-20 15:32:34 TP0] Prefill batch. #new-seq: 1, #new-token: 1937, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 352
  12174. 2025-07-20 15:32:34,444 - __main__ - INFO - sglang running req: 10 queue req: 352
  12175. 2025-07-20 15:32:35,198 - sglang - INFO - [2025-07-20 15:32:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 351
  12176. 2025-07-20 15:32:35,198 - __main__ - INFO - sglang running req: 10 queue req: 351
  12177. 2025-07-20 15:32:36,279 - sglang - INFO - [2025-07-20 15:32:36 TP0] Decode batch. #running-req: 11, #token: 28525, token usage: 0.75, gen throughput (token/s): 176.73, #queue-req: 351
  12178. 2025-07-20 15:32:36,279 - __main__ - INFO - sglang running req: 11 queue req: 351
  12179. 2025-07-20 15:32:37,265 - sglang - INFO - [2025-07-20 15:32:37 TP0] Decode batch. #running-req: 11, #token: 28965, token usage: 0.76, gen throughput (token/s): 446.33, #queue-req: 351
  12180. 2025-07-20 15:32:37,265 - __main__ - INFO - sglang running req: 11 queue req: 351
  12181. 2025-07-20 15:32:37,734 - sglang - INFO - [2025-07-20 15:32:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2762, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 350
  12182. 2025-07-20 15:32:37,735 - __main__ - INFO - sglang running req: 10 queue req: 350
  12183. 2025-07-20 15:32:39,063 - sglang - INFO - [2025-07-20 15:32:39 TP0] Decode batch. #running-req: 11, #token: 25879, token usage: 0.68, gen throughput (token/s): 244.02, #queue-req: 350
  12184. 2025-07-20 15:32:39,064 - __main__ - INFO - sglang running req: 11 queue req: 350
  12185. 2025-07-20 15:32:39,088 - sglang - INFO - [2025-07-20 15:32:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 349
  12186. 2025-07-20 15:32:39,088 - __main__ - INFO - sglang running req: 10 queue req: 349
  12187. 2025-07-20 15:32:40,095 - sglang - INFO - [2025-07-20 15:32:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1892, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 348
  12188. 2025-07-20 15:32:40,096 - __main__ - INFO - sglang running req: 10 queue req: 348
  12189. 2025-07-20 15:32:41,511 - sglang - INFO - [2025-07-20 15:32:41 TP0] Decode batch. #running-req: 11, #token: 29098, token usage: 0.77, gen throughput (token/s): 178.99, #queue-req: 348
  12190. 2025-07-20 15:32:41,511 - __main__ - INFO - sglang running req: 11 queue req: 348
  12191. 2025-07-20 15:32:42,497 - sglang - INFO - [2025-07-20 15:32:42 TP0] Decode batch. #running-req: 11, #token: 29538, token usage: 0.78, gen throughput (token/s): 445.91, #queue-req: 348
  12192. 2025-07-20 15:32:42,498 - __main__ - INFO - sglang running req: 11 queue req: 348
  12193. 2025-07-20 15:32:43,481 - sglang - INFO - [2025-07-20 15:32:43 TP0] Decode batch. #running-req: 11, #token: 29978, token usage: 0.79, gen throughput (token/s): 447.04, #queue-req: 348
  12194. 2025-07-20 15:32:43,482 - __main__ - INFO - sglang running req: 11 queue req: 348
  12195. 2025-07-20 15:32:43,714 - __main__ - INFO - Queue remaining: 2
  12196. 2025-07-20 15:32:43,715 - __main__ - INFO -
  12197. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12198. ----------------------------------------------------------------------------------
  12199. sglang_input_tokens 763.76 1017.35
  12200. sglang_output_tokens 220.31 294.41
  12201. 2025-07-20 15:32:43,715 - __main__ - INFO -
  12202. Worker ID | finished | started
  12203. ----------+----------+--------
  12204. 0 | 151 | 500
  12205. 1 | 0 | 10
  12206. 2025-07-20 15:32:44,073 - sglang - INFO - [2025-07-20 15:32:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2225, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 347
  12207. 2025-07-20 15:32:44,073 - __main__ - INFO - sglang running req: 10 queue req: 347
  12208. 2025-07-20 15:32:45,172 - sglang - INFO - [2025-07-20 15:32:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 346
  12209. 2025-07-20 15:32:45,172 - __main__ - INFO - sglang running req: 10 queue req: 346
  12210. 2025-07-20 15:32:46,033 - sglang - INFO - [2025-07-20 15:32:46 TP0] Decode batch. #running-req: 11, #token: 30627, token usage: 0.81, gen throughput (token/s): 171.70, #queue-req: 346
  12211. 2025-07-20 15:32:46,033 - __main__ - INFO - sglang running req: 11 queue req: 346
  12212. 2025-07-20 15:32:47,024 - sglang - INFO - [2025-07-20 15:32:47 TP0] Decode batch. #running-req: 11, #token: 31067, token usage: 0.82, gen throughput (token/s): 443.73, #queue-req: 346
  12213. 2025-07-20 15:32:47,024 - __main__ - INFO - sglang running req: 11 queue req: 346
  12214. 2025-07-20 15:32:48,016 - sglang - INFO - [2025-07-20 15:32:48 TP0] Decode batch. #running-req: 11, #token: 31507, token usage: 0.83, gen throughput (token/s): 443.81, #queue-req: 346
  12215. 2025-07-20 15:32:48,016 - __main__ - INFO - sglang running req: 11 queue req: 346
  12216. 2025-07-20 15:32:49,009 - sglang - INFO - [2025-07-20 15:32:49 TP0] Decode batch. #running-req: 11, #token: 31947, token usage: 0.84, gen throughput (token/s): 442.97, #queue-req: 346
  12217. 2025-07-20 15:32:49,009 - __main__ - INFO - sglang running req: 11 queue req: 346
  12218. 2025-07-20 15:32:49,183 - sglang - INFO - [2025-07-20 15:32:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 345
  12219. 2025-07-20 15:32:49,183 - __main__ - INFO - sglang running req: 10 queue req: 345
  12220. 2025-07-20 15:32:50,727 - sglang - INFO - [2025-07-20 15:32:50 TP0] Decode batch. #running-req: 11, #token: 31007, token usage: 0.82, gen throughput (token/s): 255.52, #queue-req: 345
  12221. 2025-07-20 15:32:50,727 - __main__ - INFO - sglang running req: 11 queue req: 345
  12222. 2025-07-20 15:32:51,122 - sglang - INFO - [2025-07-20 15:32:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2519, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 344
  12223. 2025-07-20 15:32:51,123 - __main__ - INFO - sglang running req: 10 queue req: 344
  12224. 2025-07-20 15:32:52,475 - sglang - INFO - [2025-07-20 15:32:52 TP0] Decode batch. #running-req: 11, #token: 31504, token usage: 0.83, gen throughput (token/s): 251.16, #queue-req: 344
  12225. 2025-07-20 15:32:52,475 - __main__ - INFO - sglang running req: 11 queue req: 344
  12226. 2025-07-20 15:32:52,574 - sglang - INFO - [2025-07-20 15:32:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2084, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 343
  12227. 2025-07-20 15:32:52,575 - __main__ - INFO - sglang running req: 10 queue req: 343
  12228. 2025-07-20 15:32:53,717 - __main__ - INFO - Queue remaining: 2
  12229. 2025-07-20 15:32:53,718 - __main__ - INFO -
  12230. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12231. ----------------------------------------------------------------------------------
  12232. sglang_input_tokens 769.97 1011.21
  12233. sglang_output_tokens 221.53 292.11
  12234. 2025-07-20 15:32:53,718 - __main__ - INFO -
  12235. Worker ID | finished | started
  12236. ----------+----------+--------
  12237. 0 | 156 | 500
  12238. 1 | 0 | 10
  12239. 2025-07-20 15:32:54,155 - sglang - INFO - [2025-07-20 15:32:54 TP0] Decode batch. #running-req: 11, #token: 31633, token usage: 0.83, gen throughput (token/s): 261.31, #queue-req: 343
  12240. 2025-07-20 15:32:54,155 - __main__ - INFO - sglang running req: 11 queue req: 343
  12241. 2025-07-20 15:32:54,924 - sglang - INFO - [2025-07-20 15:32:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2398, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 342
  12242. 2025-07-20 15:32:54,924 - __main__ - INFO - sglang running req: 10 queue req: 342
  12243. 2025-07-20 15:32:55,897 - sglang - INFO - [2025-07-20 15:32:55 TP0] Decode batch. #running-req: 11, #token: 30843, token usage: 0.81, gen throughput (token/s): 251.94, #queue-req: 342
  12244. 2025-07-20 15:32:55,898 - __main__ - INFO - sglang running req: 11 queue req: 342
  12245. 2025-07-20 15:32:56,890 - sglang - INFO - [2025-07-20 15:32:56 TP0] Decode batch. #running-req: 11, #token: 31283, token usage: 0.82, gen throughput (token/s): 443.09, #queue-req: 342
  12246. 2025-07-20 15:32:56,891 - __main__ - INFO - sglang running req: 11 queue req: 342
  12247. 2025-07-20 15:32:57,710 - sglang - INFO - [2025-07-20 15:32:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2052, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 341
  12248. 2025-07-20 15:32:57,710 - __main__ - INFO - sglang running req: 10 queue req: 341
  12249. 2025-07-20 15:32:58,567 - sglang - INFO - [2025-07-20 15:32:58 TP0] Decode batch. #running-req: 11, #token: 31379, token usage: 0.83, gen throughput (token/s): 261.78, #queue-req: 341
  12250. 2025-07-20 15:32:58,568 - __main__ - INFO - sglang running req: 11 queue req: 341
  12251. 2025-07-20 15:32:59,484 - sglang - INFO - [2025-07-20 15:32:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2838, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 340
  12252. 2025-07-20 15:32:59,484 - __main__ - INFO - sglang running req: 10 queue req: 340
  12253. 2025-07-20 15:33:00,391 - sglang - INFO - [2025-07-20 15:33:00 TP0] Decode batch. #running-req: 11, #token: 32019, token usage: 0.84, gen throughput (token/s): 240.77, #queue-req: 340
  12254. 2025-07-20 15:33:00,391 - __main__ - INFO - sglang running req: 11 queue req: 340
  12255. 2025-07-20 15:33:01,238 - sglang - INFO - [2025-07-20 15:33:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1881, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 339
  12256. 2025-07-20 15:33:01,239 - __main__ - INFO - sglang running req: 10 queue req: 339
  12257. 2025-07-20 15:33:02,039 - sglang - INFO - [2025-07-20 15:33:02 TP0] Decode batch. #running-req: 11, #token: 30680, token usage: 0.81, gen throughput (token/s): 266.30, #queue-req: 339
  12258. 2025-07-20 15:33:02,039 - __main__ - INFO - sglang running req: 11 queue req: 339
  12259. 2025-07-20 15:33:03,032 - sglang - INFO - [2025-07-20 15:33:03 TP0] Decode batch. #running-req: 11, #token: 31120, token usage: 0.82, gen throughput (token/s): 443.43, #queue-req: 339
  12260. 2025-07-20 15:33:03,032 - __main__ - INFO - sglang running req: 11 queue req: 339
  12261. 2025-07-20 15:33:03,719 - __main__ - INFO - Queue remaining: 2
  12262. 2025-07-20 15:33:03,720 - __main__ - INFO -
  12263. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12264. ----------------------------------------------------------------------------------
  12265. sglang_input_tokens 774.03 1016.18
  12266. sglang_output_tokens 222.38 293.57
  12267. 2025-07-20 15:33:03,720 - __main__ - INFO -
  12268. Worker ID | finished | started
  12269. ----------+----------+--------
  12270. 0 | 160 | 500
  12271. 1 | 0 | 10
  12272. 2025-07-20 15:33:04,025 - sglang - INFO - [2025-07-20 15:33:04 TP0] Decode batch. #running-req: 11, #token: 31560, token usage: 0.83, gen throughput (token/s): 443.02, #queue-req: 339
  12273. 2025-07-20 15:33:04,025 - __main__ - INFO - sglang running req: 11 queue req: 339
  12274. 2025-07-20 15:33:05,018 - sglang - INFO - [2025-07-20 15:33:05 TP0] Decode batch. #running-req: 11, #token: 32000, token usage: 0.84, gen throughput (token/s): 442.84, #queue-req: 339
  12275. 2025-07-20 15:33:05,019 - __main__ - INFO - sglang running req: 11 queue req: 339
  12276. 2025-07-20 15:33:06,082 - sglang - INFO - [2025-07-20 15:33:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2685, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 338
  12277. 2025-07-20 15:33:06,082 - __main__ - INFO - sglang running req: 10 queue req: 338
  12278. 2025-07-20 15:33:06,970 - sglang - INFO - [2025-07-20 15:33:06 TP0] Decode batch. #running-req: 11, #token: 31505, token usage: 0.83, gen throughput (token/s): 224.95, #queue-req: 338
  12279. 2025-07-20 15:33:06,970 - __main__ - INFO - sglang running req: 11 queue req: 338
  12280. 2025-07-20 15:33:07,959 - sglang - INFO - [2025-07-20 15:33:07 TP0] Decode batch. #running-req: 11, #token: 31945, token usage: 0.84, gen throughput (token/s): 444.80, #queue-req: 338
  12281. 2025-07-20 15:33:07,959 - __main__ - INFO - sglang running req: 11 queue req: 338
  12282. 2025-07-20 15:33:08,953 - sglang - INFO - [2025-07-20 15:33:08 TP0] Decode batch. #running-req: 11, #token: 32385, token usage: 0.85, gen throughput (token/s): 442.61, #queue-req: 338
  12283. 2025-07-20 15:33:08,954 - __main__ - INFO - sglang running req: 11 queue req: 338
  12284. 2025-07-20 15:33:09,939 - sglang - INFO - [2025-07-20 15:33:09 TP0] Decode batch. #running-req: 10, #token: 30406, token usage: 0.80, gen throughput (token/s): 408.62, #queue-req: 338
  12285. 2025-07-20 15:33:09,940 - __main__ - INFO - sglang running req: 10 queue req: 338
  12286. 2025-07-20 15:33:10,113 - sglang - INFO - [2025-07-20 15:33:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2891, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 337
  12287. 2025-07-20 15:33:10,113 - __main__ - INFO - sglang running req: 9 queue req: 337
  12288. 2025-07-20 15:33:11,753 - sglang - INFO - [2025-07-20 15:33:11 TP0] Decode batch. #running-req: 9, #token: 26362, token usage: 0.69, gen throughput (token/s): 219.50, #queue-req: 337
  12289. 2025-07-20 15:33:11,753 - __main__ - INFO - sglang running req: 9 queue req: 337
  12290. 2025-07-20 15:33:11,753 - sglang - INFO - [2025-07-20 15:33:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2898, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 336
  12291. 2025-07-20 15:33:11,753 - __main__ - INFO - sglang running req: 9 queue req: 336
  12292. 2025-07-20 15:33:13,565 - sglang - INFO - [2025-07-20 15:33:13 TP0] Decode batch. #running-req: 10, #token: 29660, token usage: 0.78, gen throughput (token/s): 220.70, #queue-req: 336
  12293. 2025-07-20 15:33:13,565 - __main__ - INFO - sglang running req: 10 queue req: 336
  12294. 2025-07-20 15:33:13,713 - sglang - INFO - [2025-07-20 15:33:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2739, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 335
  12295. 2025-07-20 15:33:13,713 - __main__ - INFO - sglang running req: 9 queue req: 335
  12296. 2025-07-20 15:33:13,720 - __main__ - INFO - Queue remaining: 2
  12297. 2025-07-20 15:33:13,721 - __main__ - INFO -
  12298. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12299. ----------------------------------------------------------------------------------
  12300. sglang_input_tokens 783.38 1016.88
  12301. sglang_output_tokens 225.15 293.53
  12302. 2025-07-20 15:33:13,721 - __main__ - INFO -
  12303. Worker ID | finished | started
  12304. ----------+----------+--------
  12305. 0 | 165 | 500
  12306. 1 | 0 | 10
  12307. 2025-07-20 15:33:15,349 - sglang - INFO - [2025-07-20 15:33:15 TP0] Decode batch. #running-req: 10, #token: 30113, token usage: 0.79, gen throughput (token/s): 223.63, #queue-req: 335
  12308. 2025-07-20 15:33:15,350 - __main__ - INFO - sglang running req: 10 queue req: 335
  12309. 2025-07-20 15:33:15,769 - sglang - INFO - [2025-07-20 15:33:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2978, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 334
  12310. 2025-07-20 15:33:15,769 - __main__ - INFO - sglang running req: 9 queue req: 334
  12311. 2025-07-20 15:33:16,856 - sglang - INFO - [2025-07-20 15:33:16 TP0] Prefill batch. #new-seq: 1, #new-token: 1462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 333
  12312. 2025-07-20 15:33:16,856 - __main__ - INFO - sglang running req: 9 queue req: 333
  12313. 2025-07-20 15:33:17,712 - sglang - INFO - [2025-07-20 15:33:17 TP0] Prefill batch. #new-seq: 2, #new-token: 5319, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 9, #queue-req: 331
  12314. 2025-07-20 15:33:17,712 - __main__ - INFO - sglang running req: 9 queue req: 331
  12315. 2025-07-20 15:33:19,285 - sglang - INFO - [2025-07-20 15:33:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 330
  12316. 2025-07-20 15:33:19,285 - __main__ - INFO - sglang running req: 10 queue req: 330
  12317. 2025-07-20 15:33:20,115 - sglang - INFO - [2025-07-20 15:33:20 TP0] Decode batch. #running-req: 11, #token: 30703, token usage: 0.81, gen throughput (token/s): 83.51, #queue-req: 330
  12318. 2025-07-20 15:33:20,116 - __main__ - INFO - sglang running req: 11 queue req: 330
  12319. 2025-07-20 15:33:21,101 - sglang - INFO - [2025-07-20 15:33:21 TP0] Decode batch. #running-req: 11, #token: 31143, token usage: 0.82, gen throughput (token/s): 446.54, #queue-req: 330
  12320. 2025-07-20 15:33:21,101 - __main__ - INFO - sglang running req: 11 queue req: 330
  12321. 2025-07-20 15:33:22,087 - sglang - INFO - [2025-07-20 15:33:22 TP0] Decode batch. #running-req: 11, #token: 31583, token usage: 0.83, gen throughput (token/s): 446.12, #queue-req: 330
  12322. 2025-07-20 15:33:22,087 - __main__ - INFO - sglang running req: 11 queue req: 330
  12323. 2025-07-20 15:33:23,079 - sglang - INFO - [2025-07-20 15:33:23 TP0] Decode batch. #running-req: 11, #token: 32023, token usage: 0.84, gen throughput (token/s): 443.43, #queue-req: 330
  12324. 2025-07-20 15:33:23,080 - __main__ - INFO - sglang running req: 11 queue req: 330
  12325. 2025-07-20 15:33:23,722 - __main__ - INFO - Queue remaining: 2
  12326. 2025-07-20 15:33:23,722 - __main__ - INFO -
  12327. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12328. ----------------------------------------------------------------------------------
  12329. sglang_input_tokens 786.77 1030.07
  12330. sglang_output_tokens 226.11 296.90
  12331. 2025-07-20 15:33:23,722 - __main__ - INFO -
  12332. Worker ID | finished | started
  12333. ----------+----------+--------
  12334. 0 | 169 | 500
  12335. 1 | 0 | 10
  12336. 2025-07-20 15:33:24,073 - sglang - INFO - [2025-07-20 15:33:24 TP0] Decode batch. #running-req: 10, #token: 29296, token usage: 0.77, gen throughput (token/s): 441.63, #queue-req: 330
  12337. 2025-07-20 15:33:24,073 - __main__ - INFO - sglang running req: 10 queue req: 330
  12338. 2025-07-20 15:33:24,074 - sglang - INFO - [2025-07-20 15:33:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2829, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 329
  12339. 2025-07-20 15:33:24,074 - __main__ - INFO - sglang running req: 10 queue req: 329
  12340. 2025-07-20 15:33:25,894 - sglang - INFO - [2025-07-20 15:33:25 TP0] Decode batch. #running-req: 11, #token: 32565, token usage: 0.86, gen throughput (token/s): 241.66, #queue-req: 329
  12341. 2025-07-20 15:33:25,894 - __main__ - INFO - sglang running req: 11 queue req: 329
  12342. 2025-07-20 15:33:26,896 - sglang - INFO - [2025-07-20 15:33:26 TP0] Decode batch. #running-req: 11, #token: 33005, token usage: 0.87, gen throughput (token/s): 442.15, #queue-req: 329
  12343. 2025-07-20 15:33:26,897 - __main__ - INFO - sglang running req: 11 queue req: 329
  12344. 2025-07-20 15:33:27,884 - sglang - INFO - [2025-07-20 15:33:27 TP0] Decode batch. #running-req: 10, #token: 31689, token usage: 0.83, gen throughput (token/s): 425.29, #queue-req: 329
  12345. 2025-07-20 15:33:27,884 - __main__ - INFO - sglang running req: 10 queue req: 329
  12346. 2025-07-20 15:33:28,872 - sglang - INFO - [2025-07-20 15:33:28 TP0] Decode batch. #running-req: 10, #token: 32089, token usage: 0.84, gen throughput (token/s): 404.87, #queue-req: 329
  12347. 2025-07-20 15:33:28,872 - __main__ - INFO - sglang running req: 10 queue req: 329
  12348. 2025-07-20 15:33:29,861 - sglang - INFO - [2025-07-20 15:33:29 TP0] Decode batch. #running-req: 10, #token: 32489, token usage: 0.86, gen throughput (token/s): 404.47, #queue-req: 329
  12349. 2025-07-20 15:33:29,861 - __main__ - INFO - sglang running req: 10 queue req: 329
  12350. 2025-07-20 15:33:30,754 - sglang - INFO - [2025-07-20 15:33:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2573, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 328
  12351. 2025-07-20 15:33:30,755 - __main__ - INFO - sglang running req: 9 queue req: 328
  12352. 2025-07-20 15:33:31,638 - sglang - INFO - [2025-07-20 15:33:31 TP0] Decode batch. #running-req: 10, #token: 31745, token usage: 0.84, gen throughput (token/s): 224.54, #queue-req: 328
  12353. 2025-07-20 15:33:31,638 - __main__ - INFO - sglang running req: 10 queue req: 328
  12354. 2025-07-20 15:33:32,662 - sglang - INFO - [2025-07-20 15:33:32 TP0] Decode batch. #running-req: 10, #token: 32145, token usage: 0.85, gen throughput (token/s): 390.48, #queue-req: 328
  12355. 2025-07-20 15:33:32,662 - __main__ - INFO - sglang running req: 10 queue req: 328
  12356. 2025-07-20 15:33:33,654 - sglang - INFO - [2025-07-20 15:33:33 TP0] Decode batch. #running-req: 10, #token: 32545, token usage: 0.86, gen throughput (token/s): 403.28, #queue-req: 328
  12357. 2025-07-20 15:33:33,654 - __main__ - INFO - sglang running req: 10 queue req: 328
  12358. 2025-07-20 15:33:33,724 - __main__ - INFO - Queue remaining: 2
  12359. 2025-07-20 15:33:33,724 - __main__ - INFO -
  12360. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12361. ----------------------------------------------------------------------------------
  12362. sglang_input_tokens 784.51 999.24
  12363. sglang_output_tokens 225.47 288.94
  12364. 2025-07-20 15:33:33,724 - __main__ - INFO -
  12365. Worker ID | finished | started
  12366. ----------+----------+--------
  12367. 0 | 172 | 500
  12368. 1 | 0 | 10
  12369. 2025-07-20 15:33:34,643 - sglang - INFO - [2025-07-20 15:33:34 TP0] Decode batch. #running-req: 10, #token: 32945, token usage: 0.87, gen throughput (token/s): 404.34, #queue-req: 328
  12370. 2025-07-20 15:33:34,643 - __main__ - INFO - sglang running req: 10 queue req: 328
  12371. 2025-07-20 15:33:35,064 - sglang - INFO - [2025-07-20 15:33:35 TP0] Prefill batch. #new-seq: 1, #new-token: 1640, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 327
  12372. 2025-07-20 15:33:35,064 - __main__ - INFO - sglang running req: 9 queue req: 327
  12373. 2025-07-20 15:33:36,210 - sglang - INFO - [2025-07-20 15:33:36 TP0] Decode batch. #running-req: 10, #token: 31481, token usage: 0.83, gen throughput (token/s): 254.60, #queue-req: 327
  12374. 2025-07-20 15:33:36,211 - __main__ - INFO - sglang running req: 10 queue req: 327
  12375. 2025-07-20 15:33:37,196 - sglang - INFO - [2025-07-20 15:33:37 TP0] Decode batch. #running-req: 10, #token: 31881, token usage: 0.84, gen throughput (token/s): 405.65, #queue-req: 327
  12376. 2025-07-20 15:33:37,197 - __main__ - INFO - sglang running req: 10 queue req: 327
  12377. 2025-07-20 15:33:38,186 - sglang - INFO - [2025-07-20 15:33:38 TP0] Decode batch. #running-req: 10, #token: 32281, token usage: 0.85, gen throughput (token/s): 404.24, #queue-req: 327
  12378. 2025-07-20 15:33:38,186 - __main__ - INFO - sglang running req: 10 queue req: 327
  12379. 2025-07-20 15:33:39,178 - sglang - INFO - [2025-07-20 15:33:39 TP0] Decode batch. #running-req: 10, #token: 32681, token usage: 0.86, gen throughput (token/s): 403.35, #queue-req: 327
  12380. 2025-07-20 15:33:39,178 - __main__ - INFO - sglang running req: 10 queue req: 327
  12381. 2025-07-20 15:33:40,170 - sglang - INFO - [2025-07-20 15:33:40 TP0] Decode batch. #running-req: 10, #token: 33081, token usage: 0.87, gen throughput (token/s): 403.31, #queue-req: 327
  12382. 2025-07-20 15:33:40,170 - __main__ - INFO - sglang running req: 10 queue req: 327
  12383. 2025-07-20 15:33:40,666 - sglang - INFO - [2025-07-20 15:33:40 TP0] Prefill batch. #new-seq: 1, #new-token: 1618, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 326
  12384. 2025-07-20 15:33:40,666 - __main__ - INFO - sglang running req: 9 queue req: 326
  12385. 2025-07-20 15:33:41,369 - sglang - INFO - [2025-07-20 15:33:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1855, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 325
  12386. 2025-07-20 15:33:41,369 - __main__ - INFO - sglang running req: 9 queue req: 325
  12387. 2025-07-20 15:33:42,387 - sglang - INFO - [2025-07-20 15:33:42 TP0] Decode batch. #running-req: 10, #token: 29612, token usage: 0.78, gen throughput (token/s): 179.46, #queue-req: 325
  12388. 2025-07-20 15:33:42,387 - __main__ - INFO - sglang running req: 10 queue req: 325
  12389. 2025-07-20 15:33:43,321 - sglang - INFO - [2025-07-20 15:33:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2625, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 324
  12390. 2025-07-20 15:33:43,321 - __main__ - INFO - sglang running req: 9 queue req: 324
  12391. 2025-07-20 15:33:43,725 - __main__ - INFO - Queue remaining: 2
  12392. 2025-07-20 15:33:43,726 - __main__ - INFO -
  12393. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12394. ----------------------------------------------------------------------------------
  12395. sglang_input_tokens 790.38 1004.70
  12396. sglang_output_tokens 227.50 292.34
  12397. 2025-07-20 15:33:43,726 - __main__ - INFO -
  12398. Worker ID | finished | started
  12399. ----------+----------+--------
  12400. 0 | 176 | 500
  12401. 1 | 0 | 10
  12402. 2025-07-20 15:33:44,155 - sglang - INFO - [2025-07-20 15:33:44 TP0] Decode batch. #running-req: 10, #token: 29198, token usage: 0.77, gen throughput (token/s): 225.75, #queue-req: 324
  12403. 2025-07-20 15:33:44,155 - __main__ - INFO - sglang running req: 10 queue req: 324
  12404. 2025-07-20 15:33:44,769 - sglang - INFO - [2025-07-20 15:33:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2697, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 323
  12405. 2025-07-20 15:33:44,769 - __main__ - INFO - sglang running req: 9 queue req: 323
  12406. 2025-07-20 15:33:45,645 - sglang - INFO - [2025-07-20 15:33:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2785, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 322
  12407. 2025-07-20 15:33:45,646 - __main__ - INFO - sglang running req: 9 queue req: 322
  12408. 2025-07-20 15:33:46,747 - sglang - INFO - [2025-07-20 15:33:46 TP0] Decode batch. #running-req: 10, #token: 26069, token usage: 0.69, gen throughput (token/s): 153.55, #queue-req: 322
  12409. 2025-07-20 15:33:46,747 - __main__ - INFO - sglang running req: 10 queue req: 322
  12410. 2025-07-20 15:33:46,771 - sglang - INFO - [2025-07-20 15:33:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2779, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 321
  12411. 2025-07-20 15:33:46,771 - __main__ - INFO - sglang running req: 9 queue req: 321
  12412. 2025-07-20 15:33:47,897 - sglang - INFO - [2025-07-20 15:33:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2202, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 320
  12413. 2025-07-20 15:33:47,897 - __main__ - INFO - sglang running req: 9 queue req: 320
  12414. 2025-07-20 15:33:49,254 - sglang - INFO - [2025-07-20 15:33:49 TP0] Decode batch. #running-req: 10, #token: 27594, token usage: 0.73, gen throughput (token/s): 158.71, #queue-req: 320
  12415. 2025-07-20 15:33:49,255 - __main__ - INFO - sglang running req: 10 queue req: 320
  12416. 2025-07-20 15:33:50,285 - sglang - INFO - [2025-07-20 15:33:50 TP0] Decode batch. #running-req: 10, #token: 27994, token usage: 0.74, gen throughput (token/s): 388.19, #queue-req: 320
  12417. 2025-07-20 15:33:50,285 - __main__ - INFO - sglang running req: 10 queue req: 320
  12418. 2025-07-20 15:33:51,261 - sglang - INFO - [2025-07-20 15:33:51 TP0] Decode batch. #running-req: 10, #token: 28394, token usage: 0.75, gen throughput (token/s): 409.60, #queue-req: 320
  12419. 2025-07-20 15:33:51,262 - __main__ - INFO - sglang running req: 10 queue req: 320
  12420. 2025-07-20 15:33:52,241 - sglang - INFO - [2025-07-20 15:33:52 TP0] Decode batch. #running-req: 10, #token: 28794, token usage: 0.76, gen throughput (token/s): 408.30, #queue-req: 320
  12421. 2025-07-20 15:33:52,241 - __main__ - INFO - sglang running req: 10 queue req: 320
  12422. 2025-07-20 15:33:52,683 - sglang - INFO - [2025-07-20 15:33:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 319
  12423. 2025-07-20 15:33:52,683 - __main__ - INFO - sglang running req: 9 queue req: 319
  12424. 2025-07-20 15:33:53,728 - __main__ - INFO - Queue remaining: 2
  12425. 2025-07-20 15:33:53,728 - __main__ - INFO -
  12426. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12427. ----------------------------------------------------------------------------------
  12428. sglang_input_tokens 802.07 1020.27
  12429. sglang_output_tokens 230.23 294.95
  12430. 2025-07-20 15:33:53,728 - __main__ - INFO -
  12431. Worker ID | finished | started
  12432. ----------+----------+--------
  12433. 0 | 182 | 500
  12434. 1 | 0 | 10
  12435. 2025-07-20 15:33:53,730 - sglang - INFO - [2025-07-20 15:33:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2854, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 318
  12436. 2025-07-20 15:33:53,731 - __main__ - INFO - sglang running req: 9 queue req: 318
  12437. 2025-07-20 15:33:54,803 - sglang - INFO - [2025-07-20 15:33:54 TP0] Decode batch. #running-req: 10, #token: 29421, token usage: 0.77, gen throughput (token/s): 155.34, #queue-req: 318
  12438. 2025-07-20 15:33:54,803 - __main__ - INFO - sglang running req: 10 queue req: 318
  12439. 2025-07-20 15:33:55,785 - sglang - INFO - [2025-07-20 15:33:55 TP0] Decode batch. #running-req: 10, #token: 29821, token usage: 0.79, gen throughput (token/s): 407.34, #queue-req: 318
  12440. 2025-07-20 15:33:55,786 - __main__ - INFO - sglang running req: 10 queue req: 318
  12441. 2025-07-20 15:33:56,671 - sglang - INFO - [2025-07-20 15:33:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 317
  12442. 2025-07-20 15:33:56,671 - __main__ - INFO - sglang running req: 9 queue req: 317
  12443. 2025-07-20 15:33:57,594 - sglang - INFO - [2025-07-20 15:33:57 TP0] Decode batch. #running-req: 10, #token: 29257, token usage: 0.77, gen throughput (token/s): 220.61, #queue-req: 317
  12444. 2025-07-20 15:33:57,594 - __main__ - INFO - sglang running req: 10 queue req: 317
  12445. 2025-07-20 15:33:58,530 - sglang - INFO - [2025-07-20 15:33:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2161, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 316
  12446. 2025-07-20 15:33:58,530 - __main__ - INFO - sglang running req: 9 queue req: 316
  12447. 2025-07-20 15:33:59,248 - sglang - INFO - [2025-07-20 15:33:59 TP0] Decode batch. #running-req: 10, #token: 29550, token usage: 0.78, gen throughput (token/s): 241.24, #queue-req: 316
  12448. 2025-07-20 15:33:59,248 - __main__ - INFO - sglang running req: 10 queue req: 316
  12449. 2025-07-20 15:34:00,230 - sglang - INFO - [2025-07-20 15:34:00 TP0] Decode batch. #running-req: 10, #token: 29950, token usage: 0.79, gen throughput (token/s): 407.32, #queue-req: 316
  12450. 2025-07-20 15:34:00,230 - __main__ - INFO - sglang running req: 10 queue req: 316
  12451. 2025-07-20 15:34:01,216 - sglang - INFO - [2025-07-20 15:34:01 TP0] Decode batch. #running-req: 10, #token: 30350, token usage: 0.80, gen throughput (token/s): 405.78, #queue-req: 316
  12452. 2025-07-20 15:34:01,216 - __main__ - INFO - sglang running req: 10 queue req: 316
  12453. 2025-07-20 15:34:02,202 - sglang - INFO - [2025-07-20 15:34:02 TP0] Decode batch. #running-req: 10, #token: 30750, token usage: 0.81, gen throughput (token/s): 405.73, #queue-req: 316
  12454. 2025-07-20 15:34:02,202 - __main__ - INFO - sglang running req: 10 queue req: 316
  12455. 2025-07-20 15:34:03,188 - sglang - INFO - [2025-07-20 15:34:03 TP0] Decode batch. #running-req: 10, #token: 31150, token usage: 0.82, gen throughput (token/s): 405.54, #queue-req: 316
  12456. 2025-07-20 15:34:03,188 - __main__ - INFO - sglang running req: 10 queue req: 316
  12457. 2025-07-20 15:34:03,729 - __main__ - INFO - Queue remaining: 2
  12458. 2025-07-20 15:34:03,730 - __main__ - INFO -
  12459. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12460. ----------------------------------------------------------------------------------
  12461. sglang_input_tokens 796.00 1017.58
  12462. sglang_output_tokens 228.53 293.85
  12463. 2025-07-20 15:34:03,730 - __main__ - INFO -
  12464. Worker ID | finished | started
  12465. ----------+----------+--------
  12466. 0 | 184 | 500
  12467. 1 | 0 | 10
  12468. 2025-07-20 15:34:04,174 - sglang - INFO - [2025-07-20 15:34:04 TP0] Decode batch. #running-req: 10, #token: 31550, token usage: 0.83, gen throughput (token/s): 405.49, #queue-req: 316
  12469. 2025-07-20 15:34:04,175 - __main__ - INFO - sglang running req: 10 queue req: 316
  12470. 2025-07-20 15:34:05,091 - sglang - INFO - [2025-07-20 15:34:05 TP0] Prefill batch. #new-seq: 1, #new-token: 2511, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 315
  12471. 2025-07-20 15:34:05,091 - __main__ - INFO - sglang running req: 9 queue req: 315
  12472. 2025-07-20 15:34:05,919 - sglang - INFO - [2025-07-20 15:34:05 TP0] Decode batch. #running-req: 10, #token: 31237, token usage: 0.82, gen throughput (token/s): 228.72, #queue-req: 315
  12473. 2025-07-20 15:34:05,919 - __main__ - INFO - sglang running req: 10 queue req: 315
  12474. 2025-07-20 15:34:06,903 - sglang - INFO - [2025-07-20 15:34:06 TP0] Decode batch. #running-req: 10, #token: 31637, token usage: 0.83, gen throughput (token/s): 406.47, #queue-req: 315
  12475. 2025-07-20 15:34:06,903 - __main__ - INFO - sglang running req: 10 queue req: 315
  12476. 2025-07-20 15:34:07,076 - sglang - INFO - [2025-07-20 15:34:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2648, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 314
  12477. 2025-07-20 15:34:07,076 - __main__ - INFO - sglang running req: 9 queue req: 314
  12478. 2025-07-20 15:34:08,675 - sglang - INFO - [2025-07-20 15:34:08 TP0] Decode batch. #running-req: 10, #token: 31890, token usage: 0.84, gen throughput (token/s): 225.17, #queue-req: 314
  12479. 2025-07-20 15:34:08,675 - __main__ - INFO - sglang running req: 10 queue req: 314
  12480. 2025-07-20 15:34:09,667 - sglang - INFO - [2025-07-20 15:34:09 TP0] Decode batch. #running-req: 10, #token: 32290, token usage: 0.85, gen throughput (token/s): 403.09, #queue-req: 314
  12481. 2025-07-20 15:34:09,668 - __main__ - INFO - sglang running req: 10 queue req: 314
  12482. 2025-07-20 15:34:10,670 - sglang - INFO - [2025-07-20 15:34:10 TP0] Decode batch. #running-req: 10, #token: 32690, token usage: 0.86, gen throughput (token/s): 398.84, #queue-req: 314
  12483. 2025-07-20 15:34:10,671 - __main__ - INFO - sglang running req: 10 queue req: 314
  12484. 2025-07-20 15:34:11,665 - sglang - INFO - [2025-07-20 15:34:11 TP0] Decode batch. #running-req: 10, #token: 33090, token usage: 0.87, gen throughput (token/s): 402.02, #queue-req: 314
  12485. 2025-07-20 15:34:11,665 - __main__ - INFO - sglang running req: 10 queue req: 314
  12486. 2025-07-20 15:34:12,663 - sglang - INFO - [2025-07-20 15:34:12 TP0] Decode batch. #running-req: 10, #token: 33490, token usage: 0.88, gen throughput (token/s): 400.87, #queue-req: 314
  12487. 2025-07-20 15:34:12,663 - __main__ - INFO - sglang running req: 10 queue req: 314
  12488. 2025-07-20 15:34:13,310 - sglang - INFO - [2025-07-20 15:34:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2513, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 9, #queue-req: 313
  12489. 2025-07-20 15:34:13,310 - __main__ - INFO - sglang running req: 9 queue req: 313
  12490. 2025-07-20 15:34:13,731 - __main__ - INFO - Queue remaining: 2
  12491. 2025-07-20 15:34:13,732 - __main__ - INFO -
  12492. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12493. ----------------------------------------------------------------------------------
  12494. sglang_input_tokens 795.22 987.39
  12495. sglang_output_tokens 228.08 286.21
  12496. 2025-07-20 15:34:13,732 - __main__ - INFO -
  12497. Worker ID | finished | started
  12498. ----------+----------+--------
  12499. 0 | 187 | 500
  12500. 1 | 0 | 10
  12501. 2025-07-20 15:34:14,409 - sglang - INFO - [2025-07-20 15:34:14 TP0] Decode batch. #running-req: 10, #token: 32865, token usage: 0.87, gen throughput (token/s): 228.52, #queue-req: 313
  12502. 2025-07-20 15:34:14,409 - __main__ - INFO - sglang running req: 10 queue req: 313
  12503. 2025-07-20 15:34:14,483 - sglang - INFO - [2025-07-20 15:34:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2013, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 312
  12504. 2025-07-20 15:34:14,483 - __main__ - INFO - sglang running req: 9 queue req: 312
  12505. 2025-07-20 15:34:15,828 - sglang - INFO - [2025-07-20 15:34:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1530, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 311
  12506. 2025-07-20 15:34:15,828 - __main__ - INFO - sglang running req: 9 queue req: 311
  12507. 2025-07-20 15:34:16,613 - sglang - INFO - [2025-07-20 15:34:16 TP0] Decode batch. #running-req: 10, #token: 29519, token usage: 0.78, gen throughput (token/s): 180.59, #queue-req: 311
  12508. 2025-07-20 15:34:16,613 - __main__ - INFO - sglang running req: 10 queue req: 311
  12509. 2025-07-20 15:34:16,809 - sglang - INFO - [2025-07-20 15:34:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 310
  12510. 2025-07-20 15:34:16,809 - __main__ - INFO - sglang running req: 9 queue req: 310
  12511. 2025-07-20 15:34:18,318 - sglang - INFO - [2025-07-20 15:34:18 TP0] Decode batch. #running-req: 10, #token: 29399, token usage: 0.77, gen throughput (token/s): 233.96, #queue-req: 310
  12512. 2025-07-20 15:34:18,319 - __main__ - INFO - sglang running req: 10 queue req: 310
  12513. 2025-07-20 15:34:19,355 - sglang - INFO - [2025-07-20 15:34:19 TP0] Decode batch. #running-req: 10, #token: 29799, token usage: 0.78, gen throughput (token/s): 385.98, #queue-req: 310
  12514. 2025-07-20 15:34:19,355 - __main__ - INFO - sglang running req: 10 queue req: 310
  12515. 2025-07-20 15:34:20,351 - sglang - INFO - [2025-07-20 15:34:20 TP0] Decode batch. #running-req: 10, #token: 30199, token usage: 0.79, gen throughput (token/s): 401.40, #queue-req: 310
  12516. 2025-07-20 15:34:20,352 - __main__ - INFO - sglang running req: 10 queue req: 310
  12517. 2025-07-20 15:34:21,370 - sglang - INFO - [2025-07-20 15:34:21 TP0] Decode batch. #running-req: 10, #token: 30599, token usage: 0.81, gen throughput (token/s): 392.54, #queue-req: 310
  12518. 2025-07-20 15:34:21,370 - __main__ - INFO - sglang running req: 10 queue req: 310
  12519. 2025-07-20 15:34:21,617 - sglang - INFO - [2025-07-20 15:34:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2581, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 309
  12520. 2025-07-20 15:34:21,617 - __main__ - INFO - sglang running req: 9 queue req: 309
  12521. 2025-07-20 15:34:22,989 - sglang - INFO - [2025-07-20 15:34:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2861, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 308
  12522. 2025-07-20 15:34:22,989 - __main__ - INFO - sglang running req: 9 queue req: 308
  12523. 2025-07-20 15:34:23,733 - __main__ - INFO - Queue remaining: 2
  12524. 2025-07-20 15:34:23,734 - __main__ - INFO -
  12525. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12526. ----------------------------------------------------------------------------------
  12527. sglang_input_tokens 804.17 1004.08
  12528. sglang_output_tokens 231.18 292.01
  12529. 2025-07-20 15:34:23,734 - __main__ - INFO -
  12530. Worker ID | finished | started
  12531. ----------+----------+--------
  12532. 0 | 192 | 500
  12533. 1 | 0 | 10
  12534. 2025-07-20 15:34:23,964 - sglang - INFO - [2025-07-20 15:34:23 TP0] Decode batch. #running-req: 10, #token: 29355, token usage: 0.77, gen throughput (token/s): 153.42, #queue-req: 308
  12535. 2025-07-20 15:34:23,964 - __main__ - INFO - sglang running req: 10 queue req: 308
  12536. 2025-07-20 15:34:24,038 - sglang - INFO - [2025-07-20 15:34:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2070, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 307
  12537. 2025-07-20 15:34:24,038 - __main__ - INFO - sglang running req: 9 queue req: 307
  12538. 2025-07-20 15:34:25,623 - sglang - INFO - [2025-07-20 15:34:25 TP0] Decode batch. #running-req: 10, #token: 28197, token usage: 0.74, gen throughput (token/s): 240.53, #queue-req: 307
  12539. 2025-07-20 15:34:25,623 - __main__ - INFO - sglang running req: 10 queue req: 307
  12540. 2025-07-20 15:34:26,334 - sglang - INFO - [2025-07-20 15:34:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2258, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 306
  12541. 2025-07-20 15:34:26,334 - __main__ - INFO - sglang running req: 9 queue req: 306
  12542. 2025-07-20 15:34:27,329 - sglang - INFO - [2025-07-20 15:34:27 TP0] Decode batch. #running-req: 10, #token: 29047, token usage: 0.76, gen throughput (token/s): 233.88, #queue-req: 306
  12543. 2025-07-20 15:34:27,329 - __main__ - INFO - sglang running req: 10 queue req: 306
  12544. 2025-07-20 15:34:28,315 - sglang - INFO - [2025-07-20 15:34:28 TP0] Decode batch. #running-req: 10, #token: 29447, token usage: 0.78, gen throughput (token/s): 405.65, #queue-req: 306
  12545. 2025-07-20 15:34:28,316 - __main__ - INFO - sglang running req: 10 queue req: 306
  12546. 2025-07-20 15:34:29,296 - sglang - INFO - [2025-07-20 15:34:29 TP0] Decode batch. #running-req: 10, #token: 29847, token usage: 0.79, gen throughput (token/s): 407.73, #queue-req: 306
  12547. 2025-07-20 15:34:29,297 - __main__ - INFO - sglang running req: 10 queue req: 306
  12548. 2025-07-20 15:34:29,370 - sglang - INFO - [2025-07-20 15:34:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2209, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 305
  12549. 2025-07-20 15:34:29,370 - __main__ - INFO - sglang running req: 9 queue req: 305
  12550. 2025-07-20 15:34:30,289 - sglang - INFO - [2025-07-20 15:34:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4820, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 9, #queue-req: 303
  12551. 2025-07-20 15:34:30,290 - __main__ - INFO - sglang running req: 9 queue req: 303
  12552. 2025-07-20 15:34:32,436 - sglang - INFO - [2025-07-20 15:34:32 TP0] Decode batch. #running-req: 11, #token: 28999, token usage: 0.76, gen throughput (token/s): 135.99, #queue-req: 303
  12553. 2025-07-20 15:34:32,436 - __main__ - INFO - sglang running req: 11 queue req: 303
  12554. 2025-07-20 15:34:33,487 - sglang - INFO - [2025-07-20 15:34:33 TP0] Decode batch. #running-req: 11, #token: 29439, token usage: 0.77, gen throughput (token/s): 418.63, #queue-req: 303
  12555. 2025-07-20 15:34:33,488 - __main__ - INFO - sglang running req: 11 queue req: 303
  12556. 2025-07-20 15:34:33,734 - __main__ - INFO - Queue remaining: 2
  12557. 2025-07-20 15:34:33,735 - __main__ - INFO -
  12558. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12559. ----------------------------------------------------------------------------------
  12560. sglang_input_tokens 807.14 1018.64
  12561. sglang_output_tokens 234.06 299.89
  12562. 2025-07-20 15:34:33,735 - __main__ - INFO -
  12563. Worker ID | finished | started
  12564. ----------+----------+--------
  12565. 0 | 196 | 500
  12566. 1 | 0 | 10
  12567. 2025-07-20 15:34:34,579 - sglang - INFO - [2025-07-20 15:34:34 TP0] Decode batch. #running-req: 11, #token: 29879, token usage: 0.79, gen throughput (token/s): 402.92, #queue-req: 303
  12568. 2025-07-20 15:34:34,580 - __main__ - INFO - sglang running req: 11 queue req: 303
  12569. 2025-07-20 15:34:35,567 - sglang - INFO - [2025-07-20 15:34:35 TP0] Decode batch. #running-req: 11, #token: 30319, token usage: 0.80, gen throughput (token/s): 445.39, #queue-req: 303
  12570. 2025-07-20 15:34:35,568 - __main__ - INFO - sglang running req: 11 queue req: 303
  12571. 2025-07-20 15:34:35,938 - sglang - INFO - [2025-07-20 15:34:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2439, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 302
  12572. 2025-07-20 15:34:35,938 - __main__ - INFO - sglang running req: 10 queue req: 302
  12573. 2025-07-20 15:34:37,306 - sglang - INFO - [2025-07-20 15:34:37 TP0] Decode batch. #running-req: 11, #token: 30096, token usage: 0.79, gen throughput (token/s): 252.55, #queue-req: 302
  12574. 2025-07-20 15:34:37,306 - __main__ - INFO - sglang running req: 11 queue req: 302
  12575. 2025-07-20 15:34:37,454 - sglang - INFO - [2025-07-20 15:34:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2664, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 301
  12576. 2025-07-20 15:34:37,454 - __main__ - INFO - sglang running req: 10 queue req: 301
  12577. 2025-07-20 15:34:38,874 - sglang - INFO - [2025-07-20 15:34:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1794, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 300
  12578. 2025-07-20 15:34:38,874 - __main__ - INFO - sglang running req: 10 queue req: 300
  12579. 2025-07-20 15:34:39,571 - sglang - INFO - [2025-07-20 15:34:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2775, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 299
  12580. 2025-07-20 15:34:39,571 - __main__ - INFO - sglang running req: 10 queue req: 299
  12581. 2025-07-20 15:34:40,551 - sglang - INFO - [2025-07-20 15:34:40 TP0] Decode batch. #running-req: 11, #token: 28845, token usage: 0.76, gen throughput (token/s): 134.66, #queue-req: 299
  12582. 2025-07-20 15:34:40,551 - __main__ - INFO - sglang running req: 11 queue req: 299
  12583. 2025-07-20 15:34:41,535 - sglang - INFO - [2025-07-20 15:34:41 TP0] Decode batch. #running-req: 11, #token: 29285, token usage: 0.77, gen throughput (token/s): 446.92, #queue-req: 299
  12584. 2025-07-20 15:34:41,535 - __main__ - INFO - sglang running req: 11 queue req: 299
  12585. 2025-07-20 15:34:42,522 - sglang - INFO - [2025-07-20 15:34:42 TP0] Decode batch. #running-req: 11, #token: 29725, token usage: 0.78, gen throughput (token/s): 445.72, #queue-req: 299
  12586. 2025-07-20 15:34:42,523 - __main__ - INFO - sglang running req: 11 queue req: 299
  12587. 2025-07-20 15:34:43,510 - sglang - INFO - [2025-07-20 15:34:43 TP0] Decode batch. #running-req: 11, #token: 30165, token usage: 0.79, gen throughput (token/s): 445.44, #queue-req: 299
  12588. 2025-07-20 15:34:43,511 - __main__ - INFO - sglang running req: 11 queue req: 299
  12589. 2025-07-20 15:34:43,737 - __main__ - INFO - Queue remaining: 2
  12590. 2025-07-20 15:34:43,738 - __main__ - INFO -
  12591. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12592. ----------------------------------------------------------------------------------
  12593. sglang_input_tokens 809.36 1014.52
  12594. sglang_output_tokens 234.57 297.32
  12595. 2025-07-20 15:34:43,738 - __main__ - INFO -
  12596. Worker ID | finished | started
  12597. ----------+----------+--------
  12598. 0 | 200 | 500
  12599. 1 | 0 | 10
  12600. 2025-07-20 15:34:44,127 - sglang - INFO - [2025-07-20 15:34:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1674, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 298
  12601. 2025-07-20 15:34:44,127 - __main__ - INFO - sglang running req: 10 queue req: 298
  12602. 2025-07-20 15:34:45,097 - sglang - INFO - [2025-07-20 15:34:45 TP0] Decode batch. #running-req: 11, #token: 29667, token usage: 0.78, gen throughput (token/s): 276.64, #queue-req: 298
  12603. 2025-07-20 15:34:45,097 - __main__ - INFO - sglang running req: 11 queue req: 298
  12604. 2025-07-20 15:34:46,083 - sglang - INFO - [2025-07-20 15:34:46 TP0] Decode batch. #running-req: 11, #token: 30107, token usage: 0.79, gen throughput (token/s): 446.18, #queue-req: 298
  12605. 2025-07-20 15:34:46,084 - __main__ - INFO - sglang running req: 11 queue req: 298
  12606. 2025-07-20 15:34:46,998 - sglang - INFO - [2025-07-20 15:34:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 297
  12607. 2025-07-20 15:34:46,998 - __main__ - INFO - sglang running req: 10 queue req: 297
  12608. 2025-07-20 15:34:47,877 - sglang - INFO - [2025-07-20 15:34:47 TP0] Decode batch. #running-req: 11, #token: 30494, token usage: 0.80, gen throughput (token/s): 244.73, #queue-req: 297
  12609. 2025-07-20 15:34:47,877 - __main__ - INFO - sglang running req: 11 queue req: 297
  12610. 2025-07-20 15:34:48,866 - sglang - INFO - [2025-07-20 15:34:48 TP0] Decode batch. #running-req: 11, #token: 30934, token usage: 0.81, gen throughput (token/s): 445.05, #queue-req: 297
  12611. 2025-07-20 15:34:48,866 - __main__ - INFO - sglang running req: 11 queue req: 297
  12612. 2025-07-20 15:34:49,411 - sglang - INFO - [2025-07-20 15:34:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2649, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 296
  12613. 2025-07-20 15:34:49,411 - __main__ - INFO - sglang running req: 10 queue req: 296
  12614. 2025-07-20 15:34:50,643 - sglang - INFO - [2025-07-20 15:34:50 TP0] Decode batch. #running-req: 11, #token: 30710, token usage: 0.81, gen throughput (token/s): 247.03, #queue-req: 296
  12615. 2025-07-20 15:34:50,643 - __main__ - INFO - sglang running req: 11 queue req: 296
  12616. 2025-07-20 15:34:51,607 - sglang - INFO - [2025-07-20 15:34:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2762, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 295
  12617. 2025-07-20 15:34:51,607 - __main__ - INFO - sglang running req: 10 queue req: 295
  12618. 2025-07-20 15:34:52,437 - sglang - INFO - [2025-07-20 15:34:52 TP0] Decode batch. #running-req: 11, #token: 31107, token usage: 0.82, gen throughput (token/s): 244.69, #queue-req: 295
  12619. 2025-07-20 15:34:52,437 - __main__ - INFO - sglang running req: 11 queue req: 295
  12620. 2025-07-20 15:34:53,379 - sglang - INFO - [2025-07-20 15:34:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2448, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 294
  12621. 2025-07-20 15:34:53,379 - __main__ - INFO - sglang running req: 10 queue req: 294
  12622. 2025-07-20 15:34:53,739 - __main__ - INFO - Queue remaining: 2
  12623. 2025-07-20 15:34:53,739 - __main__ - INFO -
  12624. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12625. ----------------------------------------------------------------------------------
  12626. sglang_input_tokens 814.05 1004.03
  12627. sglang_output_tokens 235.40 292.72
  12628. 2025-07-20 15:34:53,740 - __main__ - INFO -
  12629. Worker ID | finished | started
  12630. ----------+----------+--------
  12631. 0 | 205 | 500
  12632. 1 | 0 | 10
  12633. 2025-07-20 15:34:54,181 - sglang - INFO - [2025-07-20 15:34:54 TP0] Decode batch. #running-req: 11, #token: 31794, token usage: 0.84, gen throughput (token/s): 251.75, #queue-req: 294
  12634. 2025-07-20 15:34:54,181 - __main__ - INFO - sglang running req: 11 queue req: 294
  12635. 2025-07-20 15:34:55,174 - sglang - INFO - [2025-07-20 15:34:55 TP0] Decode batch. #running-req: 11, #token: 28527, token usage: 0.75, gen throughput (token/s): 442.99, #queue-req: 294
  12636. 2025-07-20 15:34:55,174 - __main__ - INFO - sglang running req: 11 queue req: 294
  12637. 2025-07-20 15:34:55,199 - sglang - INFO - [2025-07-20 15:34:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 293
  12638. 2025-07-20 15:34:55,199 - __main__ - INFO - sglang running req: 10 queue req: 293
  12639. 2025-07-20 15:34:56,497 - sglang - INFO - [2025-07-20 15:34:56 TP0] Prefill batch. #new-seq: 1, #new-token: 1271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 292
  12640. 2025-07-20 15:34:56,497 - __main__ - INFO - sglang running req: 10 queue req: 292
  12641. 2025-07-20 15:34:57,423 - sglang - INFO - [2025-07-20 15:34:57 TP0] Decode batch. #running-req: 11, #token: 30721, token usage: 0.81, gen throughput (token/s): 194.72, #queue-req: 292
  12642. 2025-07-20 15:34:57,424 - __main__ - INFO - sglang running req: 11 queue req: 292
  12643. 2025-07-20 15:34:58,414 - sglang - INFO - [2025-07-20 15:34:58 TP0] Decode batch. #running-req: 11, #token: 31161, token usage: 0.82, gen throughput (token/s): 444.31, #queue-req: 292
  12644. 2025-07-20 15:34:58,414 - __main__ - INFO - sglang running req: 11 queue req: 292
  12645. 2025-07-20 15:34:59,403 - sglang - INFO - [2025-07-20 15:34:59 TP0] Decode batch. #running-req: 11, #token: 31601, token usage: 0.83, gen throughput (token/s): 444.59, #queue-req: 292
  12646. 2025-07-20 15:34:59,404 - __main__ - INFO - sglang running req: 11 queue req: 292
  12647. 2025-07-20 15:35:00,410 - sglang - INFO - [2025-07-20 15:35:00 TP0] Decode batch. #running-req: 11, #token: 32041, token usage: 0.84, gen throughput (token/s): 436.93, #queue-req: 292
  12648. 2025-07-20 15:35:00,410 - __main__ - INFO - sglang running req: 11 queue req: 292
  12649. 2025-07-20 15:35:00,500 - sglang - INFO - [2025-07-20 15:35:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 291
  12650. 2025-07-20 15:35:00,500 - __main__ - INFO - sglang running req: 10 queue req: 291
  12651. 2025-07-20 15:35:01,576 - sglang - INFO - [2025-07-20 15:35:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 290
  12652. 2025-07-20 15:35:01,576 - __main__ - INFO - sglang running req: 10 queue req: 290
  12653. 2025-07-20 15:35:02,944 - sglang - INFO - [2025-07-20 15:35:02 TP0] Decode batch. #running-req: 11, #token: 32668, token usage: 0.86, gen throughput (token/s): 172.90, #queue-req: 290
  12654. 2025-07-20 15:35:02,944 - __main__ - INFO - sglang running req: 11 queue req: 290
  12655. 2025-07-20 15:35:03,740 - __main__ - INFO - Queue remaining: 2
  12656. 2025-07-20 15:35:03,741 - __main__ - INFO -
  12657. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12658. ----------------------------------------------------------------------------------
  12659. sglang_input_tokens 814.11 1017.64
  12660. sglang_output_tokens 235.06 296.45
  12661. 2025-07-20 15:35:03,741 - __main__ - INFO -
  12662. Worker ID | finished | started
  12663. ----------+----------+--------
  12664. 0 | 209 | 500
  12665. 1 | 0 | 10
  12666. 2025-07-20 15:35:03,939 - sglang - INFO - [2025-07-20 15:35:03 TP0] Decode batch. #running-req: 11, #token: 33108, token usage: 0.87, gen throughput (token/s): 441.85, #queue-req: 290
  12667. 2025-07-20 15:35:03,940 - __main__ - INFO - sglang running req: 11 queue req: 290
  12668. 2025-07-20 15:35:04,937 - sglang - INFO - [2025-07-20 15:35:04 TP0] Decode batch. #running-req: 11, #token: 33548, token usage: 0.88, gen throughput (token/s): 441.12, #queue-req: 290
  12669. 2025-07-20 15:35:04,937 - __main__ - INFO - sglang running req: 11 queue req: 290
  12670. 2025-07-20 15:35:05,934 - sglang - INFO - [2025-07-20 15:35:05 TP0] Decode batch. #running-req: 11, #token: 33988, token usage: 0.89, gen throughput (token/s): 441.40, #queue-req: 290
  12671. 2025-07-20 15:35:05,934 - __main__ - INFO - sglang running req: 11 queue req: 290
  12672. 2025-07-20 15:35:06,406 - sglang - INFO - [2025-07-20 15:35:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1377, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.81, #running-req: 10, #queue-req: 289
  12673. 2025-07-20 15:35:06,407 - __main__ - INFO - sglang running req: 10 queue req: 289
  12674. 2025-07-20 15:35:07,422 - sglang - INFO - [2025-07-20 15:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2245, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 288
  12675. 2025-07-20 15:35:07,422 - __main__ - INFO - sglang running req: 10 queue req: 288
  12676. 2025-07-20 15:35:08,170 - sglang - INFO - [2025-07-20 15:35:08 TP0] Decode batch. #running-req: 11, #token: 30992, token usage: 0.82, gen throughput (token/s): 195.83, #queue-req: 288
  12677. 2025-07-20 15:35:08,171 - __main__ - INFO - sglang running req: 11 queue req: 288
  12678. 2025-07-20 15:35:08,716 - sglang - INFO - [2025-07-20 15:35:08 TP0] Prefill batch. #new-seq: 1, #new-token: 1945, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 287
  12679. 2025-07-20 15:35:08,716 - __main__ - INFO - sglang running req: 10 queue req: 287
  12680. 2025-07-20 15:35:09,814 - sglang - INFO - [2025-07-20 15:35:09 TP0] Decode batch. #running-req: 11, #token: 30051, token usage: 0.79, gen throughput (token/s): 267.10, #queue-req: 287
  12681. 2025-07-20 15:35:09,814 - __main__ - INFO - sglang running req: 11 queue req: 287
  12682. 2025-07-20 15:35:10,358 - sglang - INFO - [2025-07-20 15:35:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2913, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 286
  12683. 2025-07-20 15:35:10,358 - __main__ - INFO - sglang running req: 10 queue req: 286
  12684. 2025-07-20 15:35:11,634 - sglang - INFO - [2025-07-20 15:35:11 TP0] Decode batch. #running-req: 11, #token: 29865, token usage: 0.79, gen throughput (token/s): 241.14, #queue-req: 286
  12685. 2025-07-20 15:35:11,635 - __main__ - INFO - sglang running req: 11 queue req: 286
  12686. 2025-07-20 15:35:12,475 - sglang - INFO - [2025-07-20 15:35:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2254, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 285
  12687. 2025-07-20 15:35:12,475 - __main__ - INFO - sglang running req: 10 queue req: 285
  12688. 2025-07-20 15:35:13,348 - sglang - INFO - [2025-07-20 15:35:13 TP0] Decode batch. #running-req: 11, #token: 31047, token usage: 0.82, gen throughput (token/s): 256.15, #queue-req: 285
  12689. 2025-07-20 15:35:13,348 - __main__ - INFO - sglang running req: 11 queue req: 285
  12690. 2025-07-20 15:35:13,742 - __main__ - INFO - Queue remaining: 2
  12691. 2025-07-20 15:35:13,743 - __main__ - INFO -
  12692. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12693. ----------------------------------------------------------------------------------
  12694. sglang_input_tokens 819.86 1014.03
  12695. sglang_output_tokens 237.29 295.29
  12696. 2025-07-20 15:35:13,743 - __main__ - INFO -
  12697. Worker ID | finished | started
  12698. ----------+----------+--------
  12699. 0 | 214 | 500
  12700. 1 | 0 | 10
  12701. 2025-07-20 15:35:14,338 - sglang - INFO - [2025-07-20 15:35:14 TP0] Decode batch. #running-req: 11, #token: 31487, token usage: 0.83, gen throughput (token/s): 444.65, #queue-req: 285
  12702. 2025-07-20 15:35:14,338 - __main__ - INFO - sglang running req: 11 queue req: 285
  12703. 2025-07-20 15:35:15,327 - sglang - INFO - [2025-07-20 15:35:15 TP0] Decode batch. #running-req: 11, #token: 31927, token usage: 0.84, gen throughput (token/s): 444.73, #queue-req: 285
  12704. 2025-07-20 15:35:15,327 - __main__ - INFO - sglang running req: 11 queue req: 285
  12705. 2025-07-20 15:35:16,321 - sglang - INFO - [2025-07-20 15:35:16 TP0] Decode batch. #running-req: 11, #token: 32367, token usage: 0.85, gen throughput (token/s): 442.81, #queue-req: 285
  12706. 2025-07-20 15:35:16,321 - __main__ - INFO - sglang running req: 11 queue req: 285
  12707. 2025-07-20 15:35:17,335 - sglang - INFO - [2025-07-20 15:35:17 TP0] Decode batch. #running-req: 11, #token: 32807, token usage: 0.86, gen throughput (token/s): 433.69, #queue-req: 285
  12708. 2025-07-20 15:35:17,336 - __main__ - INFO - sglang running req: 11 queue req: 285
  12709. 2025-07-20 15:35:18,083 - sglang - INFO - [2025-07-20 15:35:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2520, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 284
  12710. 2025-07-20 15:35:18,083 - __main__ - INFO - sglang running req: 10 queue req: 284
  12711. 2025-07-20 15:35:19,089 - sglang - INFO - [2025-07-20 15:35:19 TP0] Decode batch. #running-req: 11, #token: 32310, token usage: 0.85, gen throughput (token/s): 250.33, #queue-req: 284
  12712. 2025-07-20 15:35:19,089 - __main__ - INFO - sglang running req: 11 queue req: 284
  12713. 2025-07-20 15:35:19,585 - sglang - INFO - [2025-07-20 15:35:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2443, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 283
  12714. 2025-07-20 15:35:19,585 - __main__ - INFO - sglang running req: 10 queue req: 283
  12715. 2025-07-20 15:35:20,439 - sglang - INFO - [2025-07-20 15:35:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 282
  12716. 2025-07-20 15:35:20,439 - __main__ - INFO - sglang running req: 10 queue req: 282
  12717. 2025-07-20 15:35:21,622 - sglang - INFO - [2025-07-20 15:35:21 TP0] Decode batch. #running-req: 11, #token: 30645, token usage: 0.81, gen throughput (token/s): 172.92, #queue-req: 282
  12718. 2025-07-20 15:35:21,622 - __main__ - INFO - sglang running req: 11 queue req: 282
  12719. 2025-07-20 15:35:22,537 - sglang - INFO - [2025-07-20 15:35:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2776, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 281
  12720. 2025-07-20 15:35:22,537 - __main__ - INFO - sglang running req: 10 queue req: 281
  12721. 2025-07-20 15:35:23,417 - sglang - INFO - [2025-07-20 15:35:23 TP0] Decode batch. #running-req: 11, #token: 30614, token usage: 0.81, gen throughput (token/s): 244.55, #queue-req: 281
  12722. 2025-07-20 15:35:23,418 - __main__ - INFO - sglang running req: 11 queue req: 281
  12723. 2025-07-20 15:35:23,744 - __main__ - INFO - Queue remaining: 2
  12724. 2025-07-20 15:35:23,745 - __main__ - INFO -
  12725. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12726. ----------------------------------------------------------------------------------
  12727. sglang_input_tokens 823.72 1000.82
  12728. sglang_output_tokens 238.76 291.81
  12729. 2025-07-20 15:35:23,745 - __main__ - INFO -
  12730. Worker ID | finished | started
  12731. ----------+----------+--------
  12732. 0 | 218 | 500
  12733. 1 | 0 | 10
  12734. 2025-07-20 15:35:24,407 - sglang - INFO - [2025-07-20 15:35:24 TP0] Decode batch. #running-req: 11, #token: 31054, token usage: 0.82, gen throughput (token/s): 444.43, #queue-req: 281
  12735. 2025-07-20 15:35:24,407 - __main__ - INFO - sglang running req: 11 queue req: 281
  12736. 2025-07-20 15:35:25,398 - sglang - INFO - [2025-07-20 15:35:25 TP0] Decode batch. #running-req: 11, #token: 31494, token usage: 0.83, gen throughput (token/s): 444.01, #queue-req: 281
  12737. 2025-07-20 15:35:25,398 - __main__ - INFO - sglang running req: 11 queue req: 281
  12738. 2025-07-20 15:35:26,217 - sglang - INFO - [2025-07-20 15:35:26 TP0] Prefill batch. #new-seq: 1, #new-token: 1999, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 280
  12739. 2025-07-20 15:35:26,217 - __main__ - INFO - sglang running req: 10 queue req: 280
  12740. 2025-07-20 15:35:27,046 - sglang - INFO - [2025-07-20 15:35:27 TP0] Decode batch. #running-req: 11, #token: 30585, token usage: 0.81, gen throughput (token/s): 266.46, #queue-req: 280
  12741. 2025-07-20 15:35:27,046 - __main__ - INFO - sglang running req: 11 queue req: 280
  12742. 2025-07-20 15:35:28,035 - sglang - INFO - [2025-07-20 15:35:28 TP0] Decode batch. #running-req: 11, #token: 31025, token usage: 0.82, gen throughput (token/s): 444.72, #queue-req: 280
  12743. 2025-07-20 15:35:28,035 - __main__ - INFO - sglang running req: 11 queue req: 280
  12744. 2025-07-20 15:35:28,579 - sglang - INFO - [2025-07-20 15:35:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2120, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 279
  12745. 2025-07-20 15:35:28,579 - __main__ - INFO - sglang running req: 10 queue req: 279
  12746. 2025-07-20 15:35:29,495 - sglang - INFO - [2025-07-20 15:35:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 278
  12747. 2025-07-20 15:35:29,495 - __main__ - INFO - sglang running req: 10 queue req: 278
  12748. 2025-07-20 15:35:30,481 - sglang - INFO - [2025-07-20 15:35:30 TP0] Decode batch. #running-req: 11, #token: 30303, token usage: 0.80, gen throughput (token/s): 179.09, #queue-req: 278
  12749. 2025-07-20 15:35:30,481 - __main__ - INFO - sglang running req: 11 queue req: 278
  12750. 2025-07-20 15:35:31,184 - sglang - INFO - [2025-07-20 15:35:31 TP0] Prefill batch. #new-seq: 1, #new-token: 2368, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 277
  12751. 2025-07-20 15:35:31,184 - __main__ - INFO - sglang running req: 10 queue req: 277
  12752. 2025-07-20 15:35:32,252 - sglang - INFO - [2025-07-20 15:35:32 TP0] Decode batch. #running-req: 11, #token: 30238, token usage: 0.80, gen throughput (token/s): 247.89, #queue-req: 277
  12753. 2025-07-20 15:35:32,252 - __main__ - INFO - sglang running req: 11 queue req: 277
  12754. 2025-07-20 15:35:32,425 - sglang - INFO - [2025-07-20 15:35:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4893, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 275
  12755. 2025-07-20 15:35:32,425 - __main__ - INFO - sglang running req: 9 queue req: 275
  12756. 2025-07-20 15:35:33,746 - __main__ - INFO - Queue remaining: 2
  12757. 2025-07-20 15:35:33,747 - __main__ - INFO -
  12758. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12759. ----------------------------------------------------------------------------------
  12760. sglang_input_tokens 832.30 1035.52
  12761. sglang_output_tokens 241.61 304.06
  12762. 2025-07-20 15:35:33,747 - __main__ - INFO -
  12763. Worker ID | finished | started
  12764. ----------+----------+--------
  12765. 0 | 224 | 500
  12766. 1 | 0 | 10
  12767. 2025-07-20 15:35:34,711 - sglang - INFO - [2025-07-20 15:35:34 TP0] Decode batch. #running-req: 11, #token: 29797, token usage: 0.78, gen throughput (token/s): 178.13, #queue-req: 275
  12768. 2025-07-20 15:35:34,711 - __main__ - INFO - sglang running req: 11 queue req: 275
  12769. 2025-07-20 15:35:35,700 - sglang - INFO - [2025-07-20 15:35:35 TP0] Decode batch. #running-req: 11, #token: 30237, token usage: 0.80, gen throughput (token/s): 444.72, #queue-req: 275
  12770. 2025-07-20 15:35:35,700 - __main__ - INFO - sglang running req: 11 queue req: 275
  12771. 2025-07-20 15:35:36,690 - sglang - INFO - [2025-07-20 15:35:36 TP0] Decode batch. #running-req: 11, #token: 30677, token usage: 0.81, gen throughput (token/s): 444.43, #queue-req: 275
  12772. 2025-07-20 15:35:36,690 - __main__ - INFO - sglang running req: 11 queue req: 275
  12773. 2025-07-20 15:35:37,682 - sglang - INFO - [2025-07-20 15:35:37 TP0] Decode batch. #running-req: 11, #token: 31117, token usage: 0.82, gen throughput (token/s): 443.73, #queue-req: 275
  12774. 2025-07-20 15:35:37,682 - __main__ - INFO - sglang running req: 11 queue req: 275
  12775. 2025-07-20 15:35:38,677 - sglang - INFO - [2025-07-20 15:35:38 TP0] Decode batch. #running-req: 11, #token: 31557, token usage: 0.83, gen throughput (token/s): 441.90, #queue-req: 275
  12776. 2025-07-20 15:35:38,678 - __main__ - INFO - sglang running req: 11 queue req: 275
  12777. 2025-07-20 15:35:39,672 - sglang - INFO - [2025-07-20 15:35:39 TP0] Decode batch. #running-req: 11, #token: 31997, token usage: 0.84, gen throughput (token/s): 442.33, #queue-req: 275
  12778. 2025-07-20 15:35:39,672 - __main__ - INFO - sglang running req: 11 queue req: 275
  12779. 2025-07-20 15:35:40,667 - sglang - INFO - [2025-07-20 15:35:40 TP0] Decode batch. #running-req: 11, #token: 32437, token usage: 0.85, gen throughput (token/s): 442.26, #queue-req: 275
  12780. 2025-07-20 15:35:40,667 - __main__ - INFO - sglang running req: 11 queue req: 275
  12781. 2025-07-20 15:35:41,680 - sglang - INFO - [2025-07-20 15:35:41 TP0] Decode batch. #running-req: 11, #token: 32877, token usage: 0.87, gen throughput (token/s): 434.35, #queue-req: 275
  12782. 2025-07-20 15:35:41,680 - __main__ - INFO - sglang running req: 11 queue req: 275
  12783. 2025-07-20 15:35:42,684 - sglang - INFO - [2025-07-20 15:35:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 274
  12784. 2025-07-20 15:35:42,684 - __main__ - INFO - sglang running req: 10 queue req: 274
  12785. 2025-07-20 15:35:43,563 - sglang - INFO - [2025-07-20 15:35:43 TP0] Decode batch. #running-req: 11, #token: 31753, token usage: 0.84, gen throughput (token/s): 233.12, #queue-req: 274
  12786. 2025-07-20 15:35:43,563 - __main__ - INFO - sglang running req: 11 queue req: 274
  12787. 2025-07-20 15:35:43,687 - sglang - INFO - [2025-07-20 15:35:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 273
  12788. 2025-07-20 15:35:43,688 - __main__ - INFO - sglang running req: 10 queue req: 273
  12789. 2025-07-20 15:35:43,748 - __main__ - INFO - Queue remaining: 2
  12790. 2025-07-20 15:35:43,748 - __main__ - INFO -
  12791. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12792. ----------------------------------------------------------------------------------
  12793. sglang_input_tokens 827.86 1002.29
  12794. sglang_output_tokens 240.40 292.39
  12795. 2025-07-20 15:35:43,749 - __main__ - INFO -
  12796. Worker ID | finished | started
  12797. ----------+----------+--------
  12798. 0 | 226 | 500
  12799. 1 | 0 | 10
  12800. 2025-07-20 15:35:45,303 - sglang - INFO - [2025-07-20 15:35:45 TP0] Decode batch. #running-req: 11, #token: 31331, token usage: 0.82, gen throughput (token/s): 252.27, #queue-req: 273
  12801. 2025-07-20 15:35:45,304 - __main__ - INFO - sglang running req: 11 queue req: 273
  12802. 2025-07-20 15:35:46,024 - sglang - INFO - [2025-07-20 15:35:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 272
  12803. 2025-07-20 15:35:46,024 - __main__ - INFO - sglang running req: 10 queue req: 272
  12804. 2025-07-20 15:35:47,128 - sglang - INFO - [2025-07-20 15:35:47 TP0] Decode batch. #running-req: 11, #token: 32076, token usage: 0.84, gen throughput (token/s): 240.53, #queue-req: 272
  12805. 2025-07-20 15:35:47,129 - __main__ - INFO - sglang running req: 11 queue req: 272
  12806. 2025-07-20 15:35:48,124 - sglang - INFO - [2025-07-20 15:35:48 TP0] Decode batch. #running-req: 11, #token: 32516, token usage: 0.86, gen throughput (token/s): 442.15, #queue-req: 272
  12807. 2025-07-20 15:35:48,124 - __main__ - INFO - sglang running req: 11 queue req: 272
  12808. 2025-07-20 15:35:48,448 - sglang - INFO - [2025-07-20 15:35:48 TP0] Prefill batch. #new-seq: 1, #new-token: 2494, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 271
  12809. 2025-07-20 15:35:48,448 - __main__ - INFO - sglang running req: 10 queue req: 271
  12810. 2025-07-20 15:35:49,804 - sglang - INFO - [2025-07-20 15:35:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 270
  12811. 2025-07-20 15:35:49,804 - __main__ - INFO - sglang running req: 10 queue req: 270
  12812. 2025-07-20 15:35:50,731 - sglang - INFO - [2025-07-20 15:35:50 TP0] Decode batch. #running-req: 11, #token: 32027, token usage: 0.84, gen throughput (token/s): 167.96, #queue-req: 270
  12813. 2025-07-20 15:35:50,732 - __main__ - INFO - sglang running req: 11 queue req: 270
  12814. 2025-07-20 15:35:51,724 - sglang - INFO - [2025-07-20 15:35:51 TP0] Decode batch. #running-req: 11, #token: 32467, token usage: 0.85, gen throughput (token/s): 443.35, #queue-req: 270
  12815. 2025-07-20 15:35:51,724 - __main__ - INFO - sglang running req: 11 queue req: 270
  12816. 2025-07-20 15:35:52,296 - sglang - INFO - [2025-07-20 15:35:52 TP0] Prefill batch. #new-seq: 1, #new-token: 1462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 269
  12817. 2025-07-20 15:35:52,296 - __main__ - INFO - sglang running req: 10 queue req: 269
  12818. 2025-07-20 15:35:53,282 - sglang - INFO - [2025-07-20 15:35:53 TP0] Decode batch. #running-req: 11, #token: 31608, token usage: 0.83, gen throughput (token/s): 281.84, #queue-req: 269
  12819. 2025-07-20 15:35:53,282 - __main__ - INFO - sglang running req: 11 queue req: 269
  12820. 2025-07-20 15:35:53,381 - sglang - INFO - [2025-07-20 15:35:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 268
  12821. 2025-07-20 15:35:53,382 - __main__ - INFO - sglang running req: 10 queue req: 268
  12822. 2025-07-20 15:35:53,750 - __main__ - INFO - Queue remaining: 2
  12823. 2025-07-20 15:35:53,750 - __main__ - INFO -
  12824. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12825. ----------------------------------------------------------------------------------
  12826. sglang_input_tokens 833.40 1003.15
  12827. sglang_output_tokens 241.85 291.81
  12828. 2025-07-20 15:35:53,750 - __main__ - INFO -
  12829. Worker ID | finished | started
  12830. ----------+----------+--------
  12831. 0 | 231 | 500
  12832. 1 | 0 | 10
  12833. 2025-07-20 15:35:54,903 - sglang - INFO - [2025-07-20 15:35:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2303, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 267
  12834. 2025-07-20 15:35:54,903 - __main__ - INFO - sglang running req: 10 queue req: 267
  12835. 2025-07-20 15:35:55,756 - sglang - INFO - [2025-07-20 15:35:55 TP0] Decode batch. #running-req: 11, #token: 29744, token usage: 0.78, gen throughput (token/s): 177.01, #queue-req: 267
  12836. 2025-07-20 15:35:55,757 - __main__ - INFO - sglang running req: 11 queue req: 267
  12837. 2025-07-20 15:35:56,747 - sglang - INFO - [2025-07-20 15:35:56 TP0] Decode batch. #running-req: 11, #token: 30184, token usage: 0.79, gen throughput (token/s): 443.98, #queue-req: 267
  12838. 2025-07-20 15:35:56,747 - __main__ - INFO - sglang running req: 11 queue req: 267
  12839. 2025-07-20 15:35:57,069 - sglang - INFO - [2025-07-20 15:35:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2411, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 266
  12840. 2025-07-20 15:35:57,069 - __main__ - INFO - sglang running req: 10 queue req: 266
  12841. 2025-07-20 15:35:58,475 - sglang - INFO - [2025-07-20 15:35:58 TP0] Decode batch. #running-req: 11, #token: 30202, token usage: 0.80, gen throughput (token/s): 254.09, #queue-req: 266
  12842. 2025-07-20 15:35:58,475 - __main__ - INFO - sglang running req: 11 queue req: 266
  12843. 2025-07-20 15:35:59,464 - sglang - INFO - [2025-07-20 15:35:59 TP0] Decode batch. #running-req: 11, #token: 30642, token usage: 0.81, gen throughput (token/s): 444.68, #queue-req: 266
  12844. 2025-07-20 15:35:59,464 - __main__ - INFO - sglang running req: 11 queue req: 266
  12845. 2025-07-20 15:35:59,637 - sglang - INFO - [2025-07-20 15:35:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2764, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 265
  12846. 2025-07-20 15:35:59,638 - __main__ - INFO - sglang running req: 10 queue req: 265
  12847. 2025-07-20 15:36:01,265 - sglang - INFO - [2025-07-20 15:36:01 TP0] Decode batch. #running-req: 11, #token: 30360, token usage: 0.80, gen throughput (token/s): 243.71, #queue-req: 265
  12848. 2025-07-20 15:36:01,266 - __main__ - INFO - sglang running req: 11 queue req: 265
  12849. 2025-07-20 15:36:02,259 - sglang - INFO - [2025-07-20 15:36:02 TP0] Decode batch. #running-req: 11, #token: 30800, token usage: 0.81, gen throughput (token/s): 442.91, #queue-req: 265
  12850. 2025-07-20 15:36:02,259 - __main__ - INFO - sglang running req: 11 queue req: 265
  12851. 2025-07-20 15:36:02,781 - sglang - INFO - [2025-07-20 15:36:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2684, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 264
  12852. 2025-07-20 15:36:02,781 - __main__ - INFO - sglang running req: 10 queue req: 264
  12853. 2025-07-20 15:36:03,597 - sglang - INFO - [2025-07-20 15:36:03 TP0] Prefill batch. #new-seq: 1, #new-token: 1977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 263
  12854. 2025-07-20 15:36:03,598 - __main__ - INFO - sglang running req: 10 queue req: 263
  12855. 2025-07-20 15:36:03,751 - __main__ - INFO - Queue remaining: 2
  12856. 2025-07-20 15:36:03,752 - __main__ - INFO -
  12857. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12858. ----------------------------------------------------------------------------------
  12859. sglang_input_tokens 838.08 1032.06
  12860. sglang_output_tokens 243.57 302.53
  12861. 2025-07-20 15:36:03,752 - __main__ - INFO -
  12862. Worker ID | finished | started
  12863. ----------+----------+--------
  12864. 0 | 236 | 500
  12865. 1 | 0 | 10
  12866. 2025-07-20 15:36:04,702 - sglang - INFO - [2025-07-20 15:36:04 TP0] Decode batch. #running-req: 11, #token: 30560, token usage: 0.80, gen throughput (token/s): 179.28, #queue-req: 263
  12867. 2025-07-20 15:36:04,702 - __main__ - INFO - sglang running req: 11 queue req: 263
  12868. 2025-07-20 15:36:04,826 - sglang - INFO - [2025-07-20 15:36:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 262
  12869. 2025-07-20 15:36:04,827 - __main__ - INFO - sglang running req: 10 queue req: 262
  12870. 2025-07-20 15:36:06,531 - sglang - INFO - [2025-07-20 15:36:06 TP0] Decode batch. #running-req: 11, #token: 31257, token usage: 0.82, gen throughput (token/s): 239.99, #queue-req: 262
  12871. 2025-07-20 15:36:06,531 - __main__ - INFO - sglang running req: 11 queue req: 262
  12872. 2025-07-20 15:36:07,225 - sglang - INFO - [2025-07-20 15:36:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 261
  12873. 2025-07-20 15:36:07,226 - __main__ - INFO - sglang running req: 10 queue req: 261
  12874. 2025-07-20 15:36:08,275 - sglang - INFO - [2025-07-20 15:36:08 TP0] Decode batch. #running-req: 11, #token: 31209, token usage: 0.82, gen throughput (token/s): 251.81, #queue-req: 261
  12875. 2025-07-20 15:36:08,275 - __main__ - INFO - sglang running req: 11 queue req: 261
  12876. 2025-07-20 15:36:09,269 - sglang - INFO - [2025-07-20 15:36:09 TP0] Decode batch. #running-req: 11, #token: 31649, token usage: 0.83, gen throughput (token/s): 442.74, #queue-req: 261
  12877. 2025-07-20 15:36:09,269 - __main__ - INFO - sglang running req: 11 queue req: 261
  12878. 2025-07-20 15:36:10,264 - sglang - INFO - [2025-07-20 15:36:10 TP0] Decode batch. #running-req: 11, #token: 32089, token usage: 0.84, gen throughput (token/s): 441.86, #queue-req: 261
  12879. 2025-07-20 15:36:10,265 - __main__ - INFO - sglang running req: 11 queue req: 261
  12880. 2025-07-20 15:36:11,260 - sglang - INFO - [2025-07-20 15:36:11 TP0] Decode batch. #running-req: 11, #token: 32529, token usage: 0.86, gen throughput (token/s): 442.02, #queue-req: 261
  12881. 2025-07-20 15:36:11,260 - __main__ - INFO - sglang running req: 11 queue req: 261
  12882. 2025-07-20 15:36:12,131 - sglang - INFO - [2025-07-20 15:36:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2136, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 260
  12883. 2025-07-20 15:36:12,132 - __main__ - INFO - sglang running req: 10 queue req: 260
  12884. 2025-07-20 15:36:12,928 - sglang - INFO - [2025-07-20 15:36:12 TP0] Decode batch. #running-req: 11, #token: 32002, token usage: 0.84, gen throughput (token/s): 263.03, #queue-req: 260
  12885. 2025-07-20 15:36:12,929 - __main__ - INFO - sglang running req: 11 queue req: 260
  12886. 2025-07-20 15:36:13,753 - __main__ - INFO - Queue remaining: 2
  12887. 2025-07-20 15:36:13,753 - __main__ - INFO -
  12888. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12889. ----------------------------------------------------------------------------------
  12890. sglang_input_tokens 836.11 1006.76
  12891. sglang_output_tokens 242.24 294.29
  12892. 2025-07-20 15:36:13,753 - __main__ - INFO -
  12893. Worker ID | finished | started
  12894. ----------+----------+--------
  12895. 0 | 239 | 500
  12896. 1 | 0 | 10
  12897. 2025-07-20 15:36:13,924 - sglang - INFO - [2025-07-20 15:36:13 TP0] Decode batch. #running-req: 11, #token: 32442, token usage: 0.85, gen throughput (token/s): 442.08, #queue-req: 260
  12898. 2025-07-20 15:36:13,924 - __main__ - INFO - sglang running req: 11 queue req: 260
  12899. 2025-07-20 15:36:14,918 - sglang - INFO - [2025-07-20 15:36:14 TP0] Decode batch. #running-req: 11, #token: 32882, token usage: 0.87, gen throughput (token/s): 442.48, #queue-req: 260
  12900. 2025-07-20 15:36:14,919 - __main__ - INFO - sglang running req: 11 queue req: 260
  12901. 2025-07-20 15:36:14,993 - sglang - INFO - [2025-07-20 15:36:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2197, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 259
  12902. 2025-07-20 15:36:14,994 - __main__ - INFO - sglang running req: 10 queue req: 259
  12903. 2025-07-20 15:36:16,466 - sglang - INFO - [2025-07-20 15:36:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 258
  12904. 2025-07-20 15:36:16,466 - __main__ - INFO - sglang running req: 10 queue req: 258
  12905. 2025-07-20 15:36:17,449 - sglang - INFO - [2025-07-20 15:36:17 TP0] Decode batch. #running-req: 11, #token: 32134, token usage: 0.85, gen throughput (token/s): 173.09, #queue-req: 258
  12906. 2025-07-20 15:36:17,449 - __main__ - INFO - sglang running req: 11 queue req: 258
  12907. 2025-07-20 15:36:18,444 - sglang - INFO - [2025-07-20 15:36:18 TP0] Decode batch. #running-req: 11, #token: 32574, token usage: 0.86, gen throughput (token/s): 442.36, #queue-req: 258
  12908. 2025-07-20 15:36:18,444 - __main__ - INFO - sglang running req: 11 queue req: 258
  12909. 2025-07-20 15:36:19,440 - sglang - INFO - [2025-07-20 15:36:19 TP0] Decode batch. #running-req: 11, #token: 33014, token usage: 0.87, gen throughput (token/s): 441.51, #queue-req: 258
  12910. 2025-07-20 15:36:19,440 - __main__ - INFO - sglang running req: 11 queue req: 258
  12911. 2025-07-20 15:36:19,515 - sglang - INFO - [2025-07-20 15:36:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 257
  12912. 2025-07-20 15:36:19,516 - __main__ - INFO - sglang running req: 10 queue req: 257
  12913. 2025-07-20 15:36:21,272 - sglang - INFO - [2025-07-20 15:36:21 TP0] Decode batch. #running-req: 11, #token: 32639, token usage: 0.86, gen throughput (token/s): 239.63, #queue-req: 257
  12914. 2025-07-20 15:36:21,272 - __main__ - INFO - sglang running req: 11 queue req: 257
  12915. 2025-07-20 15:36:22,274 - sglang - INFO - [2025-07-20 15:36:22 TP0] Decode batch. #running-req: 10, #token: 30564, token usage: 0.80, gen throughput (token/s): 429.16, #queue-req: 257
  12916. 2025-07-20 15:36:22,274 - __main__ - INFO - sglang running req: 10 queue req: 257
  12917. 2025-07-20 15:36:22,595 - sglang - INFO - [2025-07-20 15:36:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 256
  12918. 2025-07-20 15:36:22,595 - __main__ - INFO - sglang running req: 9 queue req: 256
  12919. 2025-07-20 15:36:23,756 - __main__ - INFO - Queue remaining: 2
  12920. 2025-07-20 15:36:23,756 - __main__ - INFO -
  12921. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12922. ----------------------------------------------------------------------------------
  12923. sglang_input_tokens 842.21 1013.41
  12924. sglang_output_tokens 243.98 295.25
  12925. 2025-07-20 15:36:23,756 - __main__ - INFO -
  12926. Worker ID | finished | started
  12927. ----------+----------+--------
  12928. 0 | 244 | 500
  12929. 1 | 0 | 10
  12930. 2025-07-20 15:36:23,988 - sglang - INFO - [2025-07-20 15:36:23 TP0] Decode batch. #running-req: 10, #token: 29411, token usage: 0.77, gen throughput (token/s): 232.84, #queue-req: 256
  12931. 2025-07-20 15:36:23,988 - __main__ - INFO - sglang running req: 10 queue req: 256
  12932. 2025-07-20 15:36:24,727 - sglang - INFO - [2025-07-20 15:36:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 255
  12933. 2025-07-20 15:36:24,727 - __main__ - INFO - sglang running req: 9 queue req: 255
  12934. 2025-07-20 15:36:25,725 - sglang - INFO - [2025-07-20 15:36:25 TP0] Decode batch. #running-req: 10, #token: 28923, token usage: 0.76, gen throughput (token/s): 229.69, #queue-req: 255
  12935. 2025-07-20 15:36:25,725 - __main__ - INFO - sglang running req: 10 queue req: 255
  12936. 2025-07-20 15:36:25,971 - sglang - INFO - [2025-07-20 15:36:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 254
  12937. 2025-07-20 15:36:25,972 - __main__ - INFO - sglang running req: 9 queue req: 254
  12938. 2025-07-20 15:36:27,520 - sglang - INFO - [2025-07-20 15:36:27 TP0] Decode batch. #running-req: 10, #token: 29181, token usage: 0.77, gen throughput (token/s): 222.22, #queue-req: 254
  12939. 2025-07-20 15:36:27,521 - __main__ - INFO - sglang running req: 10 queue req: 254
  12940. 2025-07-20 15:36:28,508 - sglang - INFO - [2025-07-20 15:36:28 TP0] Decode batch. #running-req: 10, #token: 29581, token usage: 0.78, gen throughput (token/s): 405.02, #queue-req: 254
  12941. 2025-07-20 15:36:28,508 - __main__ - INFO - sglang running req: 10 queue req: 254
  12942. 2025-07-20 15:36:29,492 - sglang - INFO - [2025-07-20 15:36:29 TP0] Decode batch. #running-req: 10, #token: 29981, token usage: 0.79, gen throughput (token/s): 406.45, #queue-req: 254
  12943. 2025-07-20 15:36:29,492 - __main__ - INFO - sglang running req: 10 queue req: 254
  12944. 2025-07-20 15:36:30,479 - sglang - INFO - [2025-07-20 15:36:30 TP0] Decode batch. #running-req: 10, #token: 30381, token usage: 0.80, gen throughput (token/s): 405.15, #queue-req: 254
  12945. 2025-07-20 15:36:30,479 - __main__ - INFO - sglang running req: 10 queue req: 254
  12946. 2025-07-20 15:36:30,627 - sglang - INFO - [2025-07-20 15:36:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4872, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 8, #queue-req: 252
  12947. 2025-07-20 15:36:30,627 - __main__ - INFO - sglang running req: 8 queue req: 252
  12948. 2025-07-20 15:36:32,938 - sglang - INFO - [2025-07-20 15:36:32 TP0] Decode batch. #running-req: 10, #token: 28541, token usage: 0.75, gen throughput (token/s): 161.86, #queue-req: 252
  12949. 2025-07-20 15:36:32,938 - __main__ - INFO - sglang running req: 10 queue req: 252
  12950. 2025-07-20 15:36:33,750 - sglang - INFO - [2025-07-20 15:36:33 TP0] Prefill batch. #new-seq: 2, #new-token: 4028, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 250
  12951. 2025-07-20 15:36:33,750 - __main__ - INFO - sglang running req: 9 queue req: 250
  12952. 2025-07-20 15:36:33,757 - __main__ - INFO - Queue remaining: 2
  12953. 2025-07-20 15:36:33,757 - __main__ - INFO -
  12954. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12955. ----------------------------------------------------------------------------------
  12956. sglang_input_tokens 848.88 1020.25
  12957. sglang_output_tokens 246.07 298.15
  12958. 2025-07-20 15:36:33,757 - __main__ - INFO -
  12959. Worker ID | finished | started
  12960. ----------+----------+--------
  12961. 0 | 249 | 500
  12962. 1 | 0 | 10
  12963. 2025-07-20 15:36:35,256 - sglang - INFO - [2025-07-20 15:36:35 TP0] Decode batch. #running-req: 11, #token: 29288, token usage: 0.77, gen throughput (token/s): 175.14, #queue-req: 250
  12964. 2025-07-20 15:36:35,257 - __main__ - INFO - sglang running req: 11 queue req: 250
  12965. 2025-07-20 15:36:36,084 - sglang - INFO - [2025-07-20 15:36:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2557, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 249
  12966. 2025-07-20 15:36:36,084 - __main__ - INFO - sglang running req: 10 queue req: 249
  12967. 2025-07-20 15:36:37,116 - sglang - INFO - [2025-07-20 15:36:37 TP0] Decode batch. #running-req: 11, #token: 29515, token usage: 0.78, gen throughput (token/s): 236.05, #queue-req: 249
  12968. 2025-07-20 15:36:37,116 - __main__ - INFO - sglang running req: 11 queue req: 249
  12969. 2025-07-20 15:36:38,106 - sglang - INFO - [2025-07-20 15:36:38 TP0] Decode batch. #running-req: 11, #token: 29955, token usage: 0.79, gen throughput (token/s): 444.40, #queue-req: 249
  12970. 2025-07-20 15:36:38,106 - __main__ - INFO - sglang running req: 11 queue req: 249
  12971. 2025-07-20 15:36:39,097 - sglang - INFO - [2025-07-20 15:36:39 TP0] Decode batch. #running-req: 11, #token: 30395, token usage: 0.80, gen throughput (token/s): 443.87, #queue-req: 249
  12972. 2025-07-20 15:36:39,097 - __main__ - INFO - sglang running req: 11 queue req: 249
  12973. 2025-07-20 15:36:39,917 - sglang - INFO - [2025-07-20 15:36:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2723, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 248
  12974. 2025-07-20 15:36:39,917 - __main__ - INFO - sglang running req: 10 queue req: 248
  12975. 2025-07-20 15:36:40,898 - sglang - INFO - [2025-07-20 15:36:40 TP0] Decode batch. #running-req: 11, #token: 30691, token usage: 0.81, gen throughput (token/s): 243.79, #queue-req: 248
  12976. 2025-07-20 15:36:40,898 - __main__ - INFO - sglang running req: 11 queue req: 248
  12977. 2025-07-20 15:36:41,890 - sglang - INFO - [2025-07-20 15:36:41 TP0] Decode batch. #running-req: 11, #token: 31131, token usage: 0.82, gen throughput (token/s): 443.71, #queue-req: 248
  12978. 2025-07-20 15:36:41,890 - __main__ - INFO - sglang running req: 11 queue req: 248
  12979. 2025-07-20 15:36:42,883 - sglang - INFO - [2025-07-20 15:36:42 TP0] Decode batch. #running-req: 10, #token: 29855, token usage: 0.79, gen throughput (token/s): 442.03, #queue-req: 248
  12980. 2025-07-20 15:36:42,883 - __main__ - INFO - sglang running req: 10 queue req: 248
  12981. 2025-07-20 15:36:43,758 - __main__ - INFO - Queue remaining: 2
  12982. 2025-07-20 15:36:43,758 - __main__ - INFO -
  12983. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  12984. ----------------------------------------------------------------------------------
  12985. sglang_input_tokens 845.09 1011.62
  12986. sglang_output_tokens 244.79 295.57
  12987. 2025-07-20 15:36:43,758 - __main__ - INFO -
  12988. Worker ID | finished | started
  12989. ----------+----------+--------
  12990. 0 | 252 | 500
  12991. 1 | 0 | 10
  12992. 2025-07-20 15:36:43,870 - sglang - INFO - [2025-07-20 15:36:43 TP0] Decode batch. #running-req: 10, #token: 30255, token usage: 0.80, gen throughput (token/s): 405.17, #queue-req: 248
  12993. 2025-07-20 15:36:43,870 - __main__ - INFO - sglang running req: 10 queue req: 248
  12994. 2025-07-20 15:36:43,994 - sglang - INFO - [2025-07-20 15:36:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2746, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 247
  12995. 2025-07-20 15:36:43,994 - __main__ - INFO - sglang running req: 9 queue req: 247
  12996. 2025-07-20 15:36:45,692 - sglang - INFO - [2025-07-20 15:36:45 TP0] Decode batch. #running-req: 10, #token: 30527, token usage: 0.80, gen throughput (token/s): 219.01, #queue-req: 247
  12997. 2025-07-20 15:36:45,692 - __main__ - INFO - sglang running req: 10 queue req: 247
  12998. 2025-07-20 15:36:46,461 - sglang - INFO - [2025-07-20 15:36:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 246
  12999. 2025-07-20 15:36:46,461 - __main__ - INFO - sglang running req: 9 queue req: 246
  13000. 2025-07-20 15:36:47,168 - sglang - INFO - [2025-07-20 15:36:47 TP0] Decode batch. #running-req: 10, #token: 28471, token usage: 0.75, gen throughput (token/s): 270.24, #queue-req: 246
  13001. 2025-07-20 15:36:47,168 - __main__ - INFO - sglang running req: 10 queue req: 246
  13002. 2025-07-20 15:36:47,783 - sglang - INFO - [2025-07-20 15:36:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2765, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 245
  13003. 2025-07-20 15:36:47,784 - __main__ - INFO - sglang running req: 9 queue req: 245
  13004. 2025-07-20 15:36:48,964 - sglang - INFO - [2025-07-20 15:36:48 TP0] Decode batch. #running-req: 10, #token: 30500, token usage: 0.80, gen throughput (token/s): 222.25, #queue-req: 245
  13005. 2025-07-20 15:36:48,964 - __main__ - INFO - sglang running req: 10 queue req: 245
  13006. 2025-07-20 15:36:49,707 - sglang - INFO - [2025-07-20 15:36:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2462, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 244
  13007. 2025-07-20 15:36:49,707 - __main__ - INFO - sglang running req: 9 queue req: 244
  13008. 2025-07-20 15:36:50,710 - sglang - INFO - [2025-07-20 15:36:50 TP0] Decode batch. #running-req: 10, #token: 30784, token usage: 0.81, gen throughput (token/s): 228.51, #queue-req: 244
  13009. 2025-07-20 15:36:50,710 - __main__ - INFO - sglang running req: 10 queue req: 244
  13010. 2025-07-20 15:36:51,698 - sglang - INFO - [2025-07-20 15:36:51 TP0] Decode batch. #running-req: 10, #token: 31184, token usage: 0.82, gen throughput (token/s): 404.94, #queue-req: 244
  13011. 2025-07-20 15:36:51,698 - __main__ - INFO - sglang running req: 10 queue req: 244
  13012. 2025-07-20 15:36:51,896 - sglang - INFO - [2025-07-20 15:36:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2658, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 243
  13013. 2025-07-20 15:36:51,896 - __main__ - INFO - sglang running req: 9 queue req: 243
  13014. 2025-07-20 15:36:53,477 - sglang - INFO - [2025-07-20 15:36:53 TP0] Decode batch. #running-req: 10, #token: 30418, token usage: 0.80, gen throughput (token/s): 224.22, #queue-req: 243
  13015. 2025-07-20 15:36:53,478 - __main__ - INFO - sglang running req: 10 queue req: 243
  13016. 2025-07-20 15:36:53,759 - __main__ - INFO - Queue remaining: 2
  13017. 2025-07-20 15:36:53,760 - __main__ - INFO -
  13018. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13019. ----------------------------------------------------------------------------------
  13020. sglang_input_tokens 848.79 1022.97
  13021. sglang_output_tokens 245.35 297.58
  13022. 2025-07-20 15:36:53,760 - __main__ - INFO -
  13023. Worker ID | finished | started
  13024. ----------+----------+--------
  13025. 0 | 257 | 500
  13026. 1 | 0 | 10
  13027. 2025-07-20 15:36:54,272 - sglang - INFO - [2025-07-20 15:36:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 242
  13028. 2025-07-20 15:36:54,273 - __main__ - INFO - sglang running req: 9 queue req: 242
  13029. 2025-07-20 15:36:55,220 - sglang - INFO - [2025-07-20 15:36:55 TP0] Decode batch. #running-req: 10, #token: 30016, token usage: 0.79, gen throughput (token/s): 228.92, #queue-req: 242
  13030. 2025-07-20 15:36:55,220 - __main__ - INFO - sglang running req: 10 queue req: 242
  13031. 2025-07-20 15:36:56,208 - sglang - INFO - [2025-07-20 15:36:56 TP0] Decode batch. #running-req: 10, #token: 30416, token usage: 0.80, gen throughput (token/s): 404.97, #queue-req: 242
  13032. 2025-07-20 15:36:56,208 - __main__ - INFO - sglang running req: 10 queue req: 242
  13033. 2025-07-20 15:36:56,703 - sglang - INFO - [2025-07-20 15:36:56 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 241
  13034. 2025-07-20 15:36:56,703 - __main__ - INFO - sglang running req: 9 queue req: 241
  13035. 2025-07-20 15:36:57,844 - sglang - INFO - [2025-07-20 15:36:57 TP0] Decode batch. #running-req: 10, #token: 29439, token usage: 0.77, gen throughput (token/s): 243.87, #queue-req: 241
  13036. 2025-07-20 15:36:57,844 - __main__ - INFO - sglang running req: 10 queue req: 241
  13037. 2025-07-20 15:36:58,216 - sglang - INFO - [2025-07-20 15:36:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1278, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 240
  13038. 2025-07-20 15:36:58,216 - __main__ - INFO - sglang running req: 9 queue req: 240
  13039. 2025-07-20 15:36:59,338 - sglang - INFO - [2025-07-20 15:36:59 TP0] Decode batch. #running-req: 10, #token: 27934, token usage: 0.74, gen throughput (token/s): 267.14, #queue-req: 240
  13040. 2025-07-20 15:36:59,338 - __main__ - INFO - sglang running req: 10 queue req: 240
  13041. 2025-07-20 15:37:00,319 - sglang - INFO - [2025-07-20 15:37:00 TP0] Decode batch. #running-req: 10, #token: 28334, token usage: 0.75, gen throughput (token/s): 407.60, #queue-req: 240
  13042. 2025-07-20 15:37:00,319 - __main__ - INFO - sglang running req: 10 queue req: 240
  13043. 2025-07-20 15:37:01,304 - sglang - INFO - [2025-07-20 15:37:01 TP0] Decode batch. #running-req: 10, #token: 28734, token usage: 0.76, gen throughput (token/s): 405.99, #queue-req: 240
  13044. 2025-07-20 15:37:01,304 - __main__ - INFO - sglang running req: 10 queue req: 240
  13045. 2025-07-20 15:37:01,822 - sglang - INFO - [2025-07-20 15:37:01 TP0] Prefill batch. #new-seq: 2, #new-token: 4188, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 238
  13046. 2025-07-20 15:37:01,822 - __main__ - INFO - sglang running req: 9 queue req: 238
  13047. 2025-07-20 15:37:03,613 - sglang - INFO - [2025-07-20 15:37:03 TP0] Decode batch. #running-req: 11, #token: 29688, token usage: 0.78, gen throughput (token/s): 181.06, #queue-req: 238
  13048. 2025-07-20 15:37:03,613 - __main__ - INFO - sglang running req: 11 queue req: 238
  13049. 2025-07-20 15:37:03,761 - __main__ - INFO - Queue remaining: 2
  13050. 2025-07-20 15:37:03,761 - __main__ - INFO -
  13051. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13052. ----------------------------------------------------------------------------------
  13053. sglang_input_tokens 851.26 1019.37
  13054. sglang_output_tokens 245.98 295.62
  13055. 2025-07-20 15:37:03,761 - __main__ - INFO -
  13056. Worker ID | finished | started
  13057. ----------+----------+--------
  13058. 0 | 261 | 500
  13059. 1 | 0 | 10
  13060. 2025-07-20 15:37:03,809 - sglang - INFO - [2025-07-20 15:37:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 237
  13061. 2025-07-20 15:37:03,810 - __main__ - INFO - sglang running req: 10 queue req: 237
  13062. 2025-07-20 15:37:05,400 - sglang - INFO - [2025-07-20 15:37:05 TP0] Decode batch. #running-req: 11, #token: 31174, token usage: 0.82, gen throughput (token/s): 245.59, #queue-req: 237
  13063. 2025-07-20 15:37:05,401 - __main__ - INFO - sglang running req: 11 queue req: 237
  13064. 2025-07-20 15:37:06,392 - sglang - INFO - [2025-07-20 15:37:06 TP0] Decode batch. #running-req: 11, #token: 31614, token usage: 0.83, gen throughput (token/s): 443.83, #queue-req: 237
  13065. 2025-07-20 15:37:06,392 - __main__ - INFO - sglang running req: 11 queue req: 237
  13066. 2025-07-20 15:37:06,516 - sglang - INFO - [2025-07-20 15:37:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2906, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 236
  13067. 2025-07-20 15:37:06,516 - __main__ - INFO - sglang running req: 10 queue req: 236
  13068. 2025-07-20 15:37:07,576 - sglang - INFO - [2025-07-20 15:37:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1757, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 235
  13069. 2025-07-20 15:37:07,580 - __main__ - INFO - sglang running req: 10 queue req: 235
  13070. 2025-07-20 15:37:08,935 - sglang - INFO - [2025-07-20 15:37:08 TP0] Decode batch. #running-req: 11, #token: 31180, token usage: 0.82, gen throughput (token/s): 172.22, #queue-req: 235
  13071. 2025-07-20 15:37:08,935 - __main__ - INFO - sglang running req: 11 queue req: 235
  13072. 2025-07-20 15:37:09,537 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
  13073. 2025-07-20 15:37:09,556 - sglang - INFO - [2025-07-20 15:37:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 234
  13074. 2025-07-20 15:37:09,557 - __main__ - INFO - sglang running req: 10 queue req: 234
  13075. 2025-07-20 15:37:09,786 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
  13076. 2025-07-20 15:37:10,581 - sglang - INFO - [2025-07-20 15:37:10 TP0] Decode batch. #running-req: 10, #token: 27946, token usage: 0.74, gen throughput (token/s): 266.07, #queue-req: 235
  13077. 2025-07-20 15:37:10,581 - __main__ - INFO - sglang running req: 10 queue req: 235
  13078. 2025-07-20 15:37:10,582 - sglang - INFO - [2025-07-20 15:37:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2772, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 234
  13079. 2025-07-20 15:37:10,582 - __main__ - INFO - sglang running req: 10 queue req: 234
  13080. 2025-07-20 15:37:12,383 - sglang - INFO - [2025-07-20 15:37:12 TP0] Decode batch. #running-req: 11, #token: 31158, token usage: 0.82, gen throughput (token/s): 244.25, #queue-req: 234
  13081. 2025-07-20 15:37:12,383 - __main__ - INFO - sglang running req: 11 queue req: 234
  13082. 2025-07-20 15:37:13,378 - sglang - INFO - [2025-07-20 15:37:13 TP0] Decode batch. #running-req: 11, #token: 31598, token usage: 0.83, gen throughput (token/s): 442.14, #queue-req: 234
  13083. 2025-07-20 15:37:13,378 - __main__ - INFO - sglang running req: 11 queue req: 234
  13084. 2025-07-20 15:37:13,762 - __main__ - INFO - Queue remaining: 2
  13085. 2025-07-20 15:37:13,763 - __main__ - INFO -
  13086. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13087. ----------------------------------------------------------------------------------
  13088. sglang_input_tokens 853.20 1025.23
  13089. sglang_output_tokens 246.04 293.57
  13090. 2025-07-20 15:37:13,763 - __main__ - INFO -
  13091. Worker ID | finished | started
  13092. ----------+----------+--------
  13093. 0 | 265 | 500
  13094. 1 | 0 | 10
  13095. 2025-07-20 15:37:14,373 - sglang - INFO - [2025-07-20 15:37:14 TP0] Decode batch. #running-req: 11, #token: 32038, token usage: 0.84, gen throughput (token/s): 442.14, #queue-req: 234
  13096. 2025-07-20 15:37:14,373 - __main__ - INFO - sglang running req: 11 queue req: 234
  13097. 2025-07-20 15:37:15,366 - sglang - INFO - [2025-07-20 15:37:15 TP0] Decode batch. #running-req: 11, #token: 32478, token usage: 0.85, gen throughput (token/s): 443.00, #queue-req: 234
  13098. 2025-07-20 15:37:15,366 - __main__ - INFO - sglang running req: 11 queue req: 234
  13099. 2025-07-20 15:37:15,939 - sglang - INFO - [2025-07-20 15:37:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2188, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 233
  13100. 2025-07-20 15:37:15,939 - __main__ - INFO - sglang running req: 10 queue req: 233
  13101. 2025-07-20 15:37:17,087 - sglang - INFO - [2025-07-20 15:37:17 TP0] Decode batch. #running-req: 11, #token: 31502, token usage: 0.83, gen throughput (token/s): 255.04, #queue-req: 233
  13102. 2025-07-20 15:37:17,088 - __main__ - INFO - sglang running req: 11 queue req: 233
  13103. 2025-07-20 15:37:18,081 - sglang - INFO - [2025-07-20 15:37:18 TP0] Decode batch. #running-req: 11, #token: 31942, token usage: 0.84, gen throughput (token/s): 442.94, #queue-req: 233
  13104. 2025-07-20 15:37:18,081 - __main__ - INFO - sglang running req: 11 queue req: 233
  13105. 2025-07-20 15:37:18,579 - sglang - INFO - [2025-07-20 15:37:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 232
  13106. 2025-07-20 15:37:18,579 - __main__ - INFO - sglang running req: 10 queue req: 232
  13107. 2025-07-20 15:37:19,730 - sglang - INFO - [2025-07-20 15:37:19 TP0] Decode batch. #running-req: 11, #token: 32194, token usage: 0.85, gen throughput (token/s): 266.20, #queue-req: 232
  13108. 2025-07-20 15:37:19,730 - __main__ - INFO - sglang running req: 11 queue req: 232
  13109. 2025-07-20 15:37:20,325 - sglang - INFO - [2025-07-20 15:37:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2606, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 231
  13110. 2025-07-20 15:37:20,325 - __main__ - INFO - sglang running req: 9 queue req: 231
  13111. 2025-07-20 15:37:21,506 - sglang - INFO - [2025-07-20 15:37:21 TP0] Decode batch. #running-req: 10, #token: 29452, token usage: 0.78, gen throughput (token/s): 229.14, #queue-req: 231
  13112. 2025-07-20 15:37:21,506 - __main__ - INFO - sglang running req: 10 queue req: 231
  13113. 2025-07-20 15:37:21,556 - sglang - INFO - [2025-07-20 15:37:21 TP0] Prefill batch. #new-seq: 1, #new-token: 1939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 230
  13114. 2025-07-20 15:37:21,556 - __main__ - INFO - sglang running req: 9 queue req: 230
  13115. 2025-07-20 15:37:23,141 - sglang - INFO - [2025-07-20 15:37:23 TP0] Decode batch. #running-req: 10, #token: 28339, token usage: 0.75, gen throughput (token/s): 244.03, #queue-req: 230
  13116. 2025-07-20 15:37:23,142 - __main__ - INFO - sglang running req: 10 queue req: 230
  13117. 2025-07-20 15:37:23,485 - sglang - INFO - [2025-07-20 15:37:23 TP0] Prefill batch. #new-seq: 1, #new-token: 2593, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 229
  13118. 2025-07-20 15:37:23,485 - __main__ - INFO - sglang running req: 9 queue req: 229
  13119. 2025-07-20 15:37:23,764 - __main__ - INFO - Queue remaining: 2
  13120. 2025-07-20 15:37:23,764 - __main__ - INFO -
  13121. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13122. ----------------------------------------------------------------------------------
  13123. sglang_input_tokens 860.77 1035.29
  13124. sglang_output_tokens 248.19 296.91
  13125. 2025-07-20 15:37:23,764 - __main__ - INFO -
  13126. Worker ID | finished | started
  13127. ----------+----------+--------
  13128. 0 | 271 | 500
  13129. 1 | 0 | 10
  13130. 2025-07-20 15:37:24,911 - sglang - INFO - [2025-07-20 15:37:24 TP0] Decode batch. #running-req: 10, #token: 27966, token usage: 0.74, gen throughput (token/s): 225.48, #queue-req: 229
  13131. 2025-07-20 15:37:24,911 - __main__ - INFO - sglang running req: 10 queue req: 229
  13132. 2025-07-20 15:37:25,500 - sglang - INFO - [2025-07-20 15:37:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2299, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 228
  13133. 2025-07-20 15:37:25,500 - __main__ - INFO - sglang running req: 9 queue req: 228
  13134. 2025-07-20 15:37:26,620 - sglang - INFO - [2025-07-20 15:37:26 TP0] Decode batch. #running-req: 10, #token: 27461, token usage: 0.72, gen throughput (token/s): 233.44, #queue-req: 228
  13135. 2025-07-20 15:37:26,620 - __main__ - INFO - sglang running req: 10 queue req: 228
  13136. 2025-07-20 15:37:27,601 - sglang - INFO - [2025-07-20 15:37:27 TP0] Decode batch. #running-req: 10, #token: 27861, token usage: 0.73, gen throughput (token/s): 407.62, #queue-req: 228
  13137. 2025-07-20 15:37:27,602 - __main__ - INFO - sglang running req: 10 queue req: 228
  13138. 2025-07-20 15:37:28,584 - sglang - INFO - [2025-07-20 15:37:28 TP0] Decode batch. #running-req: 10, #token: 28261, token usage: 0.74, gen throughput (token/s): 407.23, #queue-req: 228
  13139. 2025-07-20 15:37:28,584 - __main__ - INFO - sglang running req: 10 queue req: 228
  13140. 2025-07-20 15:37:28,976 - sglang - INFO - [2025-07-20 15:37:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 227
  13141. 2025-07-20 15:37:28,976 - __main__ - INFO - sglang running req: 9 queue req: 227
  13142. 2025-07-20 15:37:30,321 - sglang - INFO - [2025-07-20 15:37:30 TP0] Decode batch. #running-req: 10, #token: 29004, token usage: 0.76, gen throughput (token/s): 229.66, #queue-req: 227
  13143. 2025-07-20 15:37:30,321 - __main__ - INFO - sglang running req: 10 queue req: 227
  13144. 2025-07-20 15:37:31,305 - sglang - INFO - [2025-07-20 15:37:31 TP0] Decode batch. #running-req: 10, #token: 29404, token usage: 0.77, gen throughput (token/s): 406.34, #queue-req: 227
  13145. 2025-07-20 15:37:31,305 - __main__ - INFO - sglang running req: 10 queue req: 227
  13146. 2025-07-20 15:37:32,298 - sglang - INFO - [2025-07-20 15:37:32 TP0] Decode batch. #running-req: 10, #token: 26151, token usage: 0.69, gen throughput (token/s): 403.10, #queue-req: 227
  13147. 2025-07-20 15:37:32,298 - __main__ - INFO - sglang running req: 10 queue req: 227
  13148. 2025-07-20 15:37:32,322 - sglang - INFO - [2025-07-20 15:37:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2281, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 226
  13149. 2025-07-20 15:37:32,322 - __main__ - INFO - sglang running req: 9 queue req: 226
  13150. 2025-07-20 15:37:33,766 - __main__ - INFO - Queue remaining: 2
  13151. 2025-07-20 15:37:33,767 - __main__ - INFO -
  13152. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13153. ----------------------------------------------------------------------------------
  13154. sglang_input_tokens 858.84 1018.66
  13155. sglang_output_tokens 247.37 292.33
  13156. 2025-07-20 15:37:33,767 - __main__ - INFO -
  13157. Worker ID | finished | started
  13158. ----------+----------+--------
  13159. 0 | 274 | 500
  13160. 1 | 0 | 10
  13161. 2025-07-20 15:37:34,009 - sglang - INFO - [2025-07-20 15:37:34 TP0] Decode batch. #running-req: 10, #token: 28831, token usage: 0.76, gen throughput (token/s): 233.15, #queue-req: 226
  13162. 2025-07-20 15:37:34,009 - __main__ - INFO - sglang running req: 10 queue req: 226
  13163. 2025-07-20 15:37:34,625 - sglang - INFO - [2025-07-20 15:37:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2165, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 225
  13164. 2025-07-20 15:37:34,625 - __main__ - INFO - sglang running req: 9 queue req: 225
  13165. 2025-07-20 15:37:35,666 - sglang - INFO - [2025-07-20 15:37:35 TP0] Decode batch. #running-req: 10, #token: 28062, token usage: 0.74, gen throughput (token/s): 240.84, #queue-req: 225
  13166. 2025-07-20 15:37:35,666 - __main__ - INFO - sglang running req: 10 queue req: 225
  13167. 2025-07-20 15:37:36,475 - sglang - INFO - [2025-07-20 15:37:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2909, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 224
  13168. 2025-07-20 15:37:36,475 - __main__ - INFO - sglang running req: 9 queue req: 224
  13169. 2025-07-20 15:37:37,481 - sglang - INFO - [2025-07-20 15:37:37 TP0] Decode batch. #running-req: 10, #token: 28613, token usage: 0.75, gen throughput (token/s): 219.76, #queue-req: 224
  13170. 2025-07-20 15:37:37,482 - __main__ - INFO - sglang running req: 10 queue req: 224
  13171. 2025-07-20 15:37:37,702 - sglang - INFO - [2025-07-20 15:37:37 TP0] Prefill batch. #new-seq: 1, #new-token: 1745, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 223
  13172. 2025-07-20 15:37:37,702 - __main__ - INFO - sglang running req: 9 queue req: 223
  13173. 2025-07-20 15:37:38,876 - sglang - INFO - [2025-07-20 15:37:38 TP0] Prefill batch. #new-seq: 2, #new-token: 4534, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 221
  13174. 2025-07-20 15:37:38,876 - __main__ - INFO - sglang running req: 9 queue req: 221
  13175. 2025-07-20 15:37:40,491 - sglang - INFO - [2025-07-20 15:37:40 TP0] Decode batch. #running-req: 11, #token: 29110, token usage: 0.77, gen throughput (token/s): 134.89, #queue-req: 221
  13176. 2025-07-20 15:37:40,492 - __main__ - INFO - sglang running req: 11 queue req: 221
  13177. 2025-07-20 15:37:41,480 - sglang - INFO - [2025-07-20 15:37:41 TP0] Decode batch. #running-req: 11, #token: 29550, token usage: 0.78, gen throughput (token/s): 445.12, #queue-req: 221
  13178. 2025-07-20 15:37:41,480 - __main__ - INFO - sglang running req: 11 queue req: 221
  13179. 2025-07-20 15:37:41,678 - sglang - INFO - [2025-07-20 15:37:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2180, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 220
  13180. 2025-07-20 15:37:41,678 - __main__ - INFO - sglang running req: 10 queue req: 220
  13181. 2025-07-20 15:37:43,283 - sglang - INFO - [2025-07-20 15:37:43 TP0] Decode batch. #running-req: 11, #token: 28550, token usage: 0.75, gen throughput (token/s): 243.43, #queue-req: 220
  13182. 2025-07-20 15:37:43,283 - __main__ - INFO - sglang running req: 11 queue req: 220
  13183. 2025-07-20 15:37:43,768 - __main__ - INFO - Queue remaining: 2
  13184. 2025-07-20 15:37:43,769 - __main__ - INFO -
  13185. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13186. ----------------------------------------------------------------------------------
  13187. sglang_input_tokens 863.59 1020.12
  13188. sglang_output_tokens 248.90 293.74
  13189. 2025-07-20 15:37:43,769 - __main__ - INFO -
  13190. Worker ID | finished | started
  13191. ----------+----------+--------
  13192. 0 | 279 | 500
  13193. 1 | 0 | 10
  13194. 2025-07-20 15:37:44,183 - sglang - INFO - [2025-07-20 15:37:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2348, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 219
  13195. 2025-07-20 15:37:44,183 - __main__ - INFO - sglang running req: 10 queue req: 219
  13196. 2025-07-20 15:37:45,077 - sglang - INFO - [2025-07-20 15:37:45 TP0] Decode batch. #running-req: 11, #token: 28548, token usage: 0.75, gen throughput (token/s): 244.78, #queue-req: 219
  13197. 2025-07-20 15:37:45,077 - __main__ - INFO - sglang running req: 11 queue req: 219
  13198. 2025-07-20 15:37:46,060 - sglang - INFO - [2025-07-20 15:37:46 TP0] Decode batch. #running-req: 11, #token: 28988, token usage: 0.76, gen throughput (token/s): 447.24, #queue-req: 219
  13199. 2025-07-20 15:37:46,061 - __main__ - INFO - sglang running req: 11 queue req: 219
  13200. 2025-07-20 15:37:46,506 - sglang - INFO - [2025-07-20 15:37:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2574, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 218
  13201. 2025-07-20 15:37:46,506 - __main__ - INFO - sglang running req: 10 queue req: 218
  13202. 2025-07-20 15:37:47,836 - sglang - INFO - [2025-07-20 15:37:47 TP0] Decode batch. #running-req: 11, #token: 28805, token usage: 0.76, gen throughput (token/s): 247.18, #queue-req: 218
  13203. 2025-07-20 15:37:47,837 - __main__ - INFO - sglang running req: 11 queue req: 218
  13204. 2025-07-20 15:37:48,824 - sglang - INFO - [2025-07-20 15:37:48 TP0] Decode batch. #running-req: 11, #token: 29245, token usage: 0.77, gen throughput (token/s): 445.53, #queue-req: 218
  13205. 2025-07-20 15:37:48,824 - __main__ - INFO - sglang running req: 11 queue req: 218
  13206. 2025-07-20 15:37:49,815 - sglang - INFO - [2025-07-20 15:37:49 TP0] Decode batch. #running-req: 11, #token: 29685, token usage: 0.78, gen throughput (token/s): 444.16, #queue-req: 218
  13207. 2025-07-20 15:37:49,815 - __main__ - INFO - sglang running req: 11 queue req: 218
  13208. 2025-07-20 15:37:50,806 - sglang - INFO - [2025-07-20 15:37:50 TP0] Decode batch. #running-req: 11, #token: 30125, token usage: 0.79, gen throughput (token/s): 443.94, #queue-req: 218
  13209. 2025-07-20 15:37:50,806 - __main__ - INFO - sglang running req: 11 queue req: 218
  13210. 2025-07-20 15:37:51,079 - sglang - INFO - [2025-07-20 15:37:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2971, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 217
  13211. 2025-07-20 15:37:51,079 - __main__ - INFO - sglang running req: 10 queue req: 217
  13212. 2025-07-20 15:37:52,666 - sglang - INFO - [2025-07-20 15:37:52 TP0] Decode batch. #running-req: 11, #token: 30103, token usage: 0.79, gen throughput (token/s): 235.98, #queue-req: 217
  13213. 2025-07-20 15:37:52,666 - __main__ - INFO - sglang running req: 11 queue req: 217
  13214. 2025-07-20 15:37:53,560 - sglang - INFO - [2025-07-20 15:37:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2317, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 216
  13215. 2025-07-20 15:37:53,560 - __main__ - INFO - sglang running req: 10 queue req: 216
  13216. 2025-07-20 15:37:53,770 - __main__ - INFO - Queue remaining: 2
  13217. 2025-07-20 15:37:53,770 - __main__ - INFO -
  13218. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13219. ----------------------------------------------------------------------------------
  13220. sglang_input_tokens 864.37 1015.52
  13221. sglang_output_tokens 248.72 292.27
  13222. 2025-07-20 15:37:53,770 - __main__ - INFO -
  13223. Worker ID | finished | started
  13224. ----------+----------+--------
  13225. 0 | 283 | 500
  13226. 1 | 0 | 10
  13227. 2025-07-20 15:37:54,409 - sglang - INFO - [2025-07-20 15:37:54 TP0] Decode batch. #running-req: 11, #token: 30688, token usage: 0.81, gen throughput (token/s): 251.84, #queue-req: 216
  13228. 2025-07-20 15:37:54,410 - __main__ - INFO - sglang running req: 11 queue req: 216
  13229. 2025-07-20 15:37:55,257 - sglang - INFO - [2025-07-20 15:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2014, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 215
  13230. 2025-07-20 15:37:55,257 - __main__ - INFO - sglang running req: 10 queue req: 215
  13231. 2025-07-20 15:37:56,065 - sglang - INFO - [2025-07-20 15:37:56 TP0] Decode batch. #running-req: 11, #token: 30268, token usage: 0.80, gen throughput (token/s): 265.13, #queue-req: 215
  13232. 2025-07-20 15:37:56,065 - __main__ - INFO - sglang running req: 11 queue req: 215
  13233. 2025-07-20 15:37:57,059 - sglang - INFO - [2025-07-20 15:37:57 TP0] Decode batch. #running-req: 11, #token: 30708, token usage: 0.81, gen throughput (token/s): 442.99, #queue-req: 215
  13234. 2025-07-20 15:37:57,059 - __main__ - INFO - sglang running req: 11 queue req: 215
  13235. 2025-07-20 15:37:58,053 - sglang - INFO - [2025-07-20 15:37:58 TP0] Decode batch. #running-req: 11, #token: 31148, token usage: 0.82, gen throughput (token/s): 442.27, #queue-req: 215
  13236. 2025-07-20 15:37:58,053 - __main__ - INFO - sglang running req: 11 queue req: 215
  13237. 2025-07-20 15:37:58,674 - sglang - INFO - [2025-07-20 15:37:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2720, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 214
  13238. 2025-07-20 15:37:58,674 - __main__ - INFO - sglang running req: 10 queue req: 214
  13239. 2025-07-20 15:37:59,853 - sglang - INFO - [2025-07-20 15:37:59 TP0] Decode batch. #running-req: 11, #token: 31503, token usage: 0.83, gen throughput (token/s): 243.96, #queue-req: 214
  13240. 2025-07-20 15:37:59,853 - __main__ - INFO - sglang running req: 11 queue req: 214
  13241. 2025-07-20 15:38:00,547 - sglang - INFO - [2025-07-20 15:38:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 213
  13242. 2025-07-20 15:38:00,548 - __main__ - INFO - sglang running req: 10 queue req: 213
  13243. 2025-07-20 15:38:01,495 - sglang - INFO - [2025-07-20 15:38:01 TP0] Decode batch. #running-req: 11, #token: 31070, token usage: 0.82, gen throughput (token/s): 267.33, #queue-req: 213
  13244. 2025-07-20 15:38:01,495 - __main__ - INFO - sglang running req: 11 queue req: 213
  13245. 2025-07-20 15:38:01,943 - sglang - INFO - [2025-07-20 15:38:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 212
  13246. 2025-07-20 15:38:01,943 - __main__ - INFO - sglang running req: 10 queue req: 212
  13247. 2025-07-20 15:38:02,800 - sglang - INFO - [2025-07-20 15:38:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2495, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 211
  13248. 2025-07-20 15:38:02,800 - __main__ - INFO - sglang running req: 10 queue req: 211
  13249. 2025-07-20 15:38:03,772 - __main__ - INFO - Queue remaining: 2
  13250. 2025-07-20 15:38:03,773 - __main__ - INFO -
  13251. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13252. ----------------------------------------------------------------------------------
  13253. sglang_input_tokens 867.89 1021.30
  13254. sglang_output_tokens 249.65 294.22
  13255. 2025-07-20 15:38:03,773 - __main__ - INFO -
  13256. Worker ID | finished | started
  13257. ----------+----------+--------
  13258. 0 | 288 | 500
  13259. 1 | 0 | 10
  13260. 2025-07-20 15:38:04,053 - sglang - INFO - [2025-07-20 15:38:04 TP0] Decode batch. #running-req: 11, #token: 30475, token usage: 0.80, gen throughput (token/s): 171.23, #queue-req: 211
  13261. 2025-07-20 15:38:04,053 - __main__ - INFO - sglang running req: 11 queue req: 211
  13262. 2025-07-20 15:38:05,045 - sglang - INFO - [2025-07-20 15:38:05 TP0] Decode batch. #running-req: 11, #token: 30915, token usage: 0.81, gen throughput (token/s): 443.71, #queue-req: 211
  13263. 2025-07-20 15:38:05,045 - __main__ - INFO - sglang running req: 11 queue req: 211
  13264. 2025-07-20 15:38:06,037 - sglang - INFO - [2025-07-20 15:38:06 TP0] Decode batch. #running-req: 11, #token: 31355, token usage: 0.83, gen throughput (token/s): 443.09, #queue-req: 211
  13265. 2025-07-20 15:38:06,038 - __main__ - INFO - sglang running req: 11 queue req: 211
  13266. 2025-07-20 15:38:06,658 - sglang - INFO - [2025-07-20 15:38:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2737, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 210
  13267. 2025-07-20 15:38:06,658 - __main__ - INFO - sglang running req: 10 queue req: 210
  13268. 2025-07-20 15:38:07,836 - sglang - INFO - [2025-07-20 15:38:07 TP0] Decode batch. #running-req: 11, #token: 31655, token usage: 0.83, gen throughput (token/s): 244.10, #queue-req: 210
  13269. 2025-07-20 15:38:07,836 - __main__ - INFO - sglang running req: 11 queue req: 210
  13270. 2025-07-20 15:38:08,828 - sglang - INFO - [2025-07-20 15:38:08 TP0] Decode batch. #running-req: 11, #token: 32095, token usage: 0.84, gen throughput (token/s): 443.25, #queue-req: 210
  13271. 2025-07-20 15:38:08,829 - __main__ - INFO - sglang running req: 11 queue req: 210
  13272. 2025-07-20 15:38:09,277 - sglang - INFO - [2025-07-20 15:38:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2705, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 209
  13273. 2025-07-20 15:38:09,277 - __main__ - INFO - sglang running req: 10 queue req: 209
  13274. 2025-07-20 15:38:10,646 - sglang - INFO - [2025-07-20 15:38:10 TP0] Decode batch. #running-req: 11, #token: 32026, token usage: 0.84, gen throughput (token/s): 241.51, #queue-req: 209
  13275. 2025-07-20 15:38:10,647 - __main__ - INFO - sglang running req: 11 queue req: 209
  13276. 2025-07-20 15:38:11,294 - sglang - INFO - [2025-07-20 15:38:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 208
  13277. 2025-07-20 15:38:11,294 - __main__ - INFO - sglang running req: 10 queue req: 208
  13278. 2025-07-20 15:38:12,289 - sglang - INFO - [2025-07-20 15:38:12 TP0] Decode batch. #running-req: 11, #token: 31497, token usage: 0.83, gen throughput (token/s): 267.21, #queue-req: 208
  13279. 2025-07-20 15:38:12,290 - __main__ - INFO - sglang running req: 11 queue req: 208
  13280. 2025-07-20 15:38:12,364 - sglang - INFO - [2025-07-20 15:38:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2538, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 207
  13281. 2025-07-20 15:38:12,365 - __main__ - INFO - sglang running req: 10 queue req: 207
  13282. 2025-07-20 15:38:13,774 - __main__ - INFO - Queue remaining: 2
  13283. 2025-07-20 15:38:13,775 - __main__ - INFO -
  13284. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13285. ----------------------------------------------------------------------------------
  13286. sglang_input_tokens 868.87 1018.43
  13287. sglang_output_tokens 249.90 293.19
  13288. 2025-07-20 15:38:13,775 - __main__ - INFO -
  13289. Worker ID | finished | started
  13290. ----------+----------+--------
  13291. 0 | 292 | 500
  13292. 1 | 0 | 10
  13293. 2025-07-20 15:38:14,040 - sglang - INFO - [2025-07-20 15:38:14 TP0] Decode batch. #running-req: 11, #token: 31183, token usage: 0.82, gen throughput (token/s): 250.83, #queue-req: 207
  13294. 2025-07-20 15:38:14,040 - __main__ - INFO - sglang running req: 11 queue req: 207
  13295. 2025-07-20 15:38:14,586 - sglang - INFO - [2025-07-20 15:38:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2867, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 206
  13296. 2025-07-20 15:38:14,586 - __main__ - INFO - sglang running req: 10 queue req: 206
  13297. 2025-07-20 15:38:15,860 - sglang - INFO - [2025-07-20 15:38:15 TP0] Decode batch. #running-req: 11, #token: 30592, token usage: 0.81, gen throughput (token/s): 241.10, #queue-req: 206
  13298. 2025-07-20 15:38:15,860 - __main__ - INFO - sglang running req: 11 queue req: 206
  13299. 2025-07-20 15:38:16,917 - sglang - INFO - [2025-07-20 15:38:16 TP0] Decode batch. #running-req: 11, #token: 31032, token usage: 0.82, gen throughput (token/s): 416.30, #queue-req: 206
  13300. 2025-07-20 15:38:16,917 - __main__ - INFO - sglang running req: 11 queue req: 206
  13301. 2025-07-20 15:38:18,045 - sglang - INFO - [2025-07-20 15:38:18 TP0] Decode batch. #running-req: 11, #token: 31472, token usage: 0.83, gen throughput (token/s): 390.16, #queue-req: 206
  13302. 2025-07-20 15:38:18,045 - __main__ - INFO - sglang running req: 11 queue req: 206
  13303. 2025-07-20 15:38:19,079 - sglang - INFO - [2025-07-20 15:38:19 TP0] Decode batch. #running-req: 11, #token: 31912, token usage: 0.84, gen throughput (token/s): 425.35, #queue-req: 206
  13304. 2025-07-20 15:38:19,080 - __main__ - INFO - sglang running req: 11 queue req: 206
  13305. 2025-07-20 15:38:19,873 - sglang - INFO - [2025-07-20 15:38:19 TP0] Prefill batch. #new-seq: 1, #new-token: 3019, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 205
  13306. 2025-07-20 15:38:19,873 - __main__ - INFO - sglang running req: 9 queue req: 205
  13307. 2025-07-20 15:38:20,945 - sglang - INFO - [2025-07-20 15:38:20 TP0] Decode batch. #running-req: 10, #token: 30291, token usage: 0.80, gen throughput (token/s): 222.49, #queue-req: 205
  13308. 2025-07-20 15:38:20,945 - __main__ - INFO - sglang running req: 10 queue req: 205
  13309. 2025-07-20 15:38:21,931 - sglang - INFO - [2025-07-20 15:38:21 TP0] Decode batch. #running-req: 10, #token: 30691, token usage: 0.81, gen throughput (token/s): 405.56, #queue-req: 205
  13310. 2025-07-20 15:38:21,931 - __main__ - INFO - sglang running req: 10 queue req: 205
  13311. 2025-07-20 15:38:22,916 - sglang - INFO - [2025-07-20 15:38:22 TP0] Decode batch. #running-req: 10, #token: 31091, token usage: 0.82, gen throughput (token/s): 406.01, #queue-req: 205
  13312. 2025-07-20 15:38:22,917 - __main__ - INFO - sglang running req: 10 queue req: 205
  13313. 2025-07-20 15:38:23,777 - __main__ - INFO - Queue remaining: 2
  13314. 2025-07-20 15:38:23,777 - __main__ - INFO -
  13315. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13316. ----------------------------------------------------------------------------------
  13317. sglang_input_tokens 866.51 1002.22
  13318. sglang_output_tokens 249.50 289.29
  13319. 2025-07-20 15:38:23,777 - __main__ - INFO -
  13320. Worker ID | finished | started
  13321. ----------+----------+--------
  13322. 0 | 295 | 500
  13323. 1 | 0 | 10
  13324. 2025-07-20 15:38:23,903 - sglang - INFO - [2025-07-20 15:38:23 TP0] Decode batch. #running-req: 10, #token: 31491, token usage: 0.83, gen throughput (token/s): 405.22, #queue-req: 205
  13325. 2025-07-20 15:38:23,904 - __main__ - INFO - sglang running req: 10 queue req: 205
  13326. 2025-07-20 15:38:24,933 - sglang - INFO - [2025-07-20 15:38:24 TP0] Decode batch. #running-req: 10, #token: 31891, token usage: 0.84, gen throughput (token/s): 388.48, #queue-req: 205
  13327. 2025-07-20 15:38:24,933 - __main__ - INFO - sglang running req: 10 queue req: 205
  13328. 2025-07-20 15:38:25,230 - sglang - INFO - [2025-07-20 15:38:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 204
  13329. 2025-07-20 15:38:25,231 - __main__ - INFO - sglang running req: 9 queue req: 204
  13330. 2025-07-20 15:38:26,758 - sglang - INFO - [2025-07-20 15:38:26 TP0] Decode batch. #running-req: 10, #token: 32718, token usage: 0.86, gen throughput (token/s): 218.67, #queue-req: 204
  13331. 2025-07-20 15:38:26,758 - __main__ - INFO - sglang running req: 10 queue req: 204
  13332. 2025-07-20 15:38:27,753 - sglang - INFO - [2025-07-20 15:38:27 TP0] Decode batch. #running-req: 10, #token: 33118, token usage: 0.87, gen throughput (token/s): 402.00, #queue-req: 204
  13333. 2025-07-20 15:38:27,753 - __main__ - INFO - sglang running req: 10 queue req: 204
  13334. 2025-07-20 15:38:28,026 - sglang - INFO - [2025-07-20 15:38:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2362, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 203
  13335. 2025-07-20 15:38:28,027 - __main__ - INFO - sglang running req: 9 queue req: 203
  13336. 2025-07-20 15:38:28,925 - sglang - INFO - [2025-07-20 15:38:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2766, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 202
  13337. 2025-07-20 15:38:28,925 - __main__ - INFO - sglang running req: 9 queue req: 202
  13338. 2025-07-20 15:38:30,302 - sglang - INFO - [2025-07-20 15:38:30 TP0] Decode batch. #running-req: 10, #token: 31848, token usage: 0.84, gen throughput (token/s): 156.11, #queue-req: 202
  13339. 2025-07-20 15:38:30,302 - __main__ - INFO - sglang running req: 10 queue req: 202
  13340. 2025-07-20 15:38:31,292 - sglang - INFO - [2025-07-20 15:38:31 TP0] Decode batch. #running-req: 10, #token: 32248, token usage: 0.85, gen throughput (token/s): 404.10, #queue-req: 202
  13341. 2025-07-20 15:38:31,292 - __main__ - INFO - sglang running req: 10 queue req: 202
  13342. 2025-07-20 15:38:31,417 - sglang - INFO - [2025-07-20 15:38:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1575, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 201
  13343. 2025-07-20 15:38:31,417 - __main__ - INFO - sglang running req: 9 queue req: 201
  13344. 2025-07-20 15:38:32,865 - sglang - INFO - [2025-07-20 15:38:32 TP0] Decode batch. #running-req: 10, #token: 30695, token usage: 0.81, gen throughput (token/s): 253.57, #queue-req: 201
  13345. 2025-07-20 15:38:32,866 - __main__ - INFO - sglang running req: 10 queue req: 201
  13346. 2025-07-20 15:38:33,779 - __main__ - INFO - Queue remaining: 2
  13347. 2025-07-20 15:38:33,779 - __main__ - INFO -
  13348. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13349. ----------------------------------------------------------------------------------
  13350. sglang_input_tokens 867.82 1012.34
  13351. sglang_output_tokens 249.91 292.32
  13352. 2025-07-20 15:38:33,779 - __main__ - INFO -
  13353. Worker ID | finished | started
  13354. ----------+----------+--------
  13355. 0 | 299 | 500
  13356. 1 | 0 | 10
  13357. 2025-07-20 15:38:33,853 - sglang - INFO - [2025-07-20 15:38:33 TP0] Decode batch. #running-req: 10, #token: 31095, token usage: 0.82, gen throughput (token/s): 404.92, #queue-req: 201
  13358. 2025-07-20 15:38:33,853 - __main__ - INFO - sglang running req: 10 queue req: 201
  13359. 2025-07-20 15:38:34,274 - sglang - INFO - [2025-07-20 15:38:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 200
  13360. 2025-07-20 15:38:34,274 - __main__ - INFO - sglang running req: 9 queue req: 200
  13361. 2025-07-20 15:38:35,596 - sglang - INFO - [2025-07-20 15:38:35 TP0] Decode batch. #running-req: 10, #token: 29794, token usage: 0.78, gen throughput (token/s): 228.90, #queue-req: 200
  13362. 2025-07-20 15:38:35,597 - __main__ - INFO - sglang running req: 10 queue req: 200
  13363. 2025-07-20 15:38:35,843 - sglang - INFO - [2025-07-20 15:38:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2576, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 199
  13364. 2025-07-20 15:38:35,843 - __main__ - INFO - sglang running req: 9 queue req: 199
  13365. 2025-07-20 15:38:37,098 - sglang - INFO - [2025-07-20 15:38:37 TP0] Prefill batch. #new-seq: 2, #new-token: 3615, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 197
  13366. 2025-07-20 15:38:37,098 - __main__ - INFO - sglang running req: 9 queue req: 197
  13367. 2025-07-20 15:38:38,582 - sglang - INFO - [2025-07-20 15:38:38 TP0] Decode batch. #running-req: 11, #token: 29695, token usage: 0.78, gen throughput (token/s): 137.01, #queue-req: 197
  13368. 2025-07-20 15:38:38,582 - __main__ - INFO - sglang running req: 11 queue req: 197
  13369. 2025-07-20 15:38:38,879 - sglang - INFO - [2025-07-20 15:38:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2375, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 196
  13370. 2025-07-20 15:38:38,879 - __main__ - INFO - sglang running req: 10 queue req: 196
  13371. 2025-07-20 15:38:40,318 - sglang - INFO - [2025-07-20 15:38:40 TP0] Decode batch. #running-req: 11, #token: 28906, token usage: 0.76, gen throughput (token/s): 252.78, #queue-req: 196
  13372. 2025-07-20 15:38:40,319 - __main__ - INFO - sglang running req: 11 queue req: 196
  13373. 2025-07-20 15:38:41,256 - sglang - INFO - [2025-07-20 15:38:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2003, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 195
  13374. 2025-07-20 15:38:41,256 - __main__ - INFO - sglang running req: 10 queue req: 195
  13375. 2025-07-20 15:38:41,963 - sglang - INFO - [2025-07-20 15:38:41 TP0] Decode batch. #running-req: 11, #token: 29541, token usage: 0.78, gen throughput (token/s): 266.91, #queue-req: 195
  13376. 2025-07-20 15:38:41,963 - __main__ - INFO - sglang running req: 11 queue req: 195
  13377. 2025-07-20 15:38:42,582 - sglang - INFO - [2025-07-20 15:38:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2867, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 194
  13378. 2025-07-20 15:38:42,582 - __main__ - INFO - sglang running req: 10 queue req: 194
  13379. 2025-07-20 15:38:43,782 - __main__ - INFO - Queue remaining: 2
  13380. 2025-07-20 15:38:43,782 - __main__ - INFO -
  13381. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13382. ----------------------------------------------------------------------------------
  13383. sglang_input_tokens 874.00 1030.66
  13384. sglang_output_tokens 251.58 296.82
  13385. 2025-07-20 15:38:43,782 - __main__ - INFO -
  13386. Worker ID | finished | started
  13387. ----------+----------+--------
  13388. 0 | 305 | 500
  13389. 1 | 0 | 10
  13390. 2025-07-20 15:38:43,786 - sglang - INFO - [2025-07-20 15:38:43 TP0] Decode batch. #running-req: 11, #token: 31446, token usage: 0.83, gen throughput (token/s): 240.80, #queue-req: 194
  13391. 2025-07-20 15:38:43,786 - __main__ - INFO - sglang running req: 11 queue req: 194
  13392. 2025-07-20 15:38:44,779 - sglang - INFO - [2025-07-20 15:38:44 TP0] Decode batch. #running-req: 11, #token: 31886, token usage: 0.84, gen throughput (token/s): 443.23, #queue-req: 194
  13393. 2025-07-20 15:38:44,779 - __main__ - INFO - sglang running req: 11 queue req: 194
  13394. 2025-07-20 15:38:45,770 - sglang - INFO - [2025-07-20 15:38:45 TP0] Decode batch. #running-req: 11, #token: 32326, token usage: 0.85, gen throughput (token/s): 444.20, #queue-req: 194
  13395. 2025-07-20 15:38:45,770 - __main__ - INFO - sglang running req: 11 queue req: 194
  13396. 2025-07-20 15:38:46,366 - sglang - INFO - [2025-07-20 15:38:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2471, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 193
  13397. 2025-07-20 15:38:46,366 - __main__ - INFO - sglang running req: 10 queue req: 193
  13398. 2025-07-20 15:38:47,524 - sglang - INFO - [2025-07-20 15:38:47 TP0] Decode batch. #running-req: 11, #token: 31488, token usage: 0.83, gen throughput (token/s): 250.24, #queue-req: 193
  13399. 2025-07-20 15:38:47,524 - __main__ - INFO - sglang running req: 11 queue req: 193
  13400. 2025-07-20 15:38:48,553 - sglang - INFO - [2025-07-20 15:38:48 TP0] Decode batch. #running-req: 11, #token: 31928, token usage: 0.84, gen throughput (token/s): 427.74, #queue-req: 193
  13401. 2025-07-20 15:38:48,553 - __main__ - INFO - sglang running req: 11 queue req: 193
  13402. 2025-07-20 15:38:49,592 - sglang - INFO - [2025-07-20 15:38:49 TP0] Decode batch. #running-req: 11, #token: 32368, token usage: 0.85, gen throughput (token/s): 423.31, #queue-req: 193
  13403. 2025-07-20 15:38:49,592 - __main__ - INFO - sglang running req: 11 queue req: 193
  13404. 2025-07-20 15:38:50,607 - sglang - INFO - [2025-07-20 15:38:50 TP0] Decode batch. #running-req: 11, #token: 32808, token usage: 0.86, gen throughput (token/s): 433.67, #queue-req: 193
  13405. 2025-07-20 15:38:50,607 - __main__ - INFO - sglang running req: 11 queue req: 193
  13406. 2025-07-20 15:38:51,605 - sglang - INFO - [2025-07-20 15:38:51 TP0] Decode batch. #running-req: 11, #token: 33248, token usage: 0.88, gen throughput (token/s): 440.49, #queue-req: 193
  13407. 2025-07-20 15:38:51,605 - __main__ - INFO - sglang running req: 11 queue req: 193
  13408. 2025-07-20 15:38:52,686 - sglang - INFO - [2025-07-20 15:38:52 TP0] Decode batch. #running-req: 11, #token: 33688, token usage: 0.89, gen throughput (token/s): 407.31, #queue-req: 193
  13409. 2025-07-20 15:38:52,686 - __main__ - INFO - sglang running req: 11 queue req: 193
  13410. 2025-07-20 15:38:53,677 - sglang - INFO - [2025-07-20 15:38:53 TP0] Decode batch. #running-req: 10, #token: 31057, token usage: 0.82, gen throughput (token/s): 416.50, #queue-req: 193
  13411. 2025-07-20 15:38:53,678 - __main__ - INFO - sglang running req: 10 queue req: 193
  13412. 2025-07-20 15:38:53,783 - __main__ - INFO - Queue remaining: 2
  13413. 2025-07-20 15:38:53,783 - __main__ - INFO -
  13414. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13415. ----------------------------------------------------------------------------------
  13416. sglang_input_tokens 869.82 1000.47
  13417. sglang_output_tokens 250.44 288.99
  13418. 2025-07-20 15:38:53,784 - __main__ - INFO -
  13419. Worker ID | finished | started
  13420. ----------+----------+--------
  13421. 0 | 307 | 500
  13422. 1 | 0 | 10
  13423. 2025-07-20 15:38:54,496 - sglang - INFO - [2025-07-20 15:38:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 192
  13424. 2025-07-20 15:38:54,496 - __main__ - INFO - sglang running req: 9 queue req: 192
  13425. 2025-07-20 15:38:55,252 - sglang - INFO - [2025-07-20 15:38:55 TP0] Prefill batch. #new-seq: 1, #new-token: 1626, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 191
  13426. 2025-07-20 15:38:55,252 - __main__ - INFO - sglang running req: 9 queue req: 191
  13427. 2025-07-20 15:38:55,911 - sglang - INFO - [2025-07-20 15:38:55 TP0] Decode batch. #running-req: 10, #token: 29129, token usage: 0.77, gen throughput (token/s): 178.16, #queue-req: 191
  13428. 2025-07-20 15:38:55,911 - __main__ - INFO - sglang running req: 10 queue req: 191
  13429. 2025-07-20 15:38:56,900 - sglang - INFO - [2025-07-20 15:38:56 TP0] Decode batch. #running-req: 10, #token: 29529, token usage: 0.78, gen throughput (token/s): 404.59, #queue-req: 191
  13430. 2025-07-20 15:38:56,900 - __main__ - INFO - sglang running req: 10 queue req: 191
  13431. 2025-07-20 15:38:57,890 - sglang - INFO - [2025-07-20 15:38:57 TP0] Decode batch. #running-req: 10, #token: 29929, token usage: 0.79, gen throughput (token/s): 404.07, #queue-req: 191
  13432. 2025-07-20 15:38:57,890 - __main__ - INFO - sglang running req: 10 queue req: 191
  13433. 2025-07-20 15:38:58,236 - sglang - INFO - [2025-07-20 15:38:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2667, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 190
  13434. 2025-07-20 15:38:58,236 - __main__ - INFO - sglang running req: 9 queue req: 190
  13435. 2025-07-20 15:38:59,543 - sglang - INFO - [2025-07-20 15:38:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 189
  13436. 2025-07-20 15:38:59,543 - __main__ - INFO - sglang running req: 9 queue req: 189
  13437. 2025-07-20 15:39:00,474 - sglang - INFO - [2025-07-20 15:39:00 TP0] Decode batch. #running-req: 10, #token: 28938, token usage: 0.76, gen throughput (token/s): 153.98, #queue-req: 189
  13438. 2025-07-20 15:39:00,475 - __main__ - INFO - sglang running req: 10 queue req: 189
  13439. 2025-07-20 15:39:01,457 - sglang - INFO - [2025-07-20 15:39:01 TP0] Decode batch. #running-req: 10, #token: 29338, token usage: 0.77, gen throughput (token/s): 407.00, #queue-req: 189
  13440. 2025-07-20 15:39:01,459 - __main__ - INFO - sglang running req: 10 queue req: 189
  13441. 2025-07-20 15:39:02,443 - sglang - INFO - [2025-07-20 15:39:02 TP0] Decode batch. #running-req: 10, #token: 29738, token usage: 0.78, gen throughput (token/s): 405.60, #queue-req: 189
  13442. 2025-07-20 15:39:02,444 - __main__ - INFO - sglang running req: 10 queue req: 189
  13443. 2025-07-20 15:39:03,036 - sglang - INFO - [2025-07-20 15:39:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 188
  13444. 2025-07-20 15:39:03,037 - __main__ - INFO - sglang running req: 9 queue req: 188
  13445. 2025-07-20 15:39:03,786 - __main__ - INFO - Queue remaining: 2
  13446. 2025-07-20 15:39:03,786 - __main__ - INFO -
  13447. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13448. ----------------------------------------------------------------------------------
  13449. sglang_input_tokens 874.28 1017.92
  13450. sglang_output_tokens 251.69 294.21
  13451. 2025-07-20 15:39:03,786 - __main__ - INFO -
  13452. Worker ID | finished | started
  13453. ----------+----------+--------
  13454. 0 | 312 | 500
  13455. 1 | 0 | 10
  13456. 2025-07-20 15:39:04,158 - sglang - INFO - [2025-07-20 15:39:04 TP0] Decode batch. #running-req: 10, #token: 29137, token usage: 0.77, gen throughput (token/s): 232.71, #queue-req: 188
  13457. 2025-07-20 15:39:04,158 - __main__ - INFO - sglang running req: 10 queue req: 188
  13458. 2025-07-20 15:39:04,946 - sglang - INFO - [2025-07-20 15:39:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2447, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 187
  13459. 2025-07-20 15:39:04,946 - __main__ - INFO - sglang running req: 9 queue req: 187
  13460. 2025-07-20 15:39:05,898 - sglang - INFO - [2025-07-20 15:39:05 TP0] Decode batch. #running-req: 10, #token: 28852, token usage: 0.76, gen throughput (token/s): 229.37, #queue-req: 187
  13461. 2025-07-20 15:39:05,898 - __main__ - INFO - sglang running req: 10 queue req: 187
  13462. 2025-07-20 15:39:06,193 - sglang - INFO - [2025-07-20 15:39:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1862, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 186
  13463. 2025-07-20 15:39:06,194 - __main__ - INFO - sglang running req: 9 queue req: 186
  13464. 2025-07-20 15:39:07,528 - sglang - INFO - [2025-07-20 15:39:07 TP0] Decode batch. #running-req: 10, #token: 27939, token usage: 0.74, gen throughput (token/s): 244.82, #queue-req: 186
  13465. 2025-07-20 15:39:07,528 - __main__ - INFO - sglang running req: 10 queue req: 186
  13466. 2025-07-20 15:39:08,213 - sglang - INFO - [2025-07-20 15:39:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 185
  13467. 2025-07-20 15:39:08,213 - __main__ - INFO - sglang running req: 9 queue req: 185
  13468. 2025-07-20 15:39:09,234 - sglang - INFO - [2025-07-20 15:39:09 TP0] Decode batch. #running-req: 10, #token: 28261, token usage: 0.74, gen throughput (token/s): 233.75, #queue-req: 185
  13469. 2025-07-20 15:39:09,235 - __main__ - INFO - sglang running req: 10 queue req: 185
  13470. 2025-07-20 15:39:09,408 - sglang - INFO - [2025-07-20 15:39:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2187, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 184
  13471. 2025-07-20 15:39:09,408 - __main__ - INFO - sglang running req: 9 queue req: 184
  13472. 2025-07-20 15:39:10,951 - sglang - INFO - [2025-07-20 15:39:10 TP0] Decode batch. #running-req: 10, #token: 28852, token usage: 0.76, gen throughput (token/s): 232.40, #queue-req: 184
  13473. 2025-07-20 15:39:10,951 - __main__ - INFO - sglang running req: 10 queue req: 184
  13474. 2025-07-20 15:39:11,621 - sglang - INFO - [2025-07-20 15:39:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2684, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 183
  13475. 2025-07-20 15:39:11,621 - __main__ - INFO - sglang running req: 9 queue req: 183
  13476. 2025-07-20 15:39:12,742 - sglang - INFO - [2025-07-20 15:39:12 TP0] Decode batch. #running-req: 10, #token: 28227, token usage: 0.74, gen throughput (token/s): 222.82, #queue-req: 183
  13477. 2025-07-20 15:39:12,742 - __main__ - INFO - sglang running req: 10 queue req: 183
  13478. 2025-07-20 15:39:13,788 - __main__ - INFO - Queue remaining: 2
  13479. 2025-07-20 15:39:13,788 - __main__ - INFO -
  13480. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13481. ----------------------------------------------------------------------------------
  13482. sglang_input_tokens 877.11 1039.08
  13483. sglang_output_tokens 252.45 300.80
  13484. 2025-07-20 15:39:13,789 - __main__ - INFO -
  13485. Worker ID | finished | started
  13486. ----------+----------+--------
  13487. 0 | 317 | 500
  13488. 1 | 0 | 10
  13489. 2025-07-20 15:39:13,895 - sglang - INFO - [2025-07-20 15:39:13 TP0] Decode batch. #running-req: 10, #token: 28627, token usage: 0.75, gen throughput (token/s): 346.91, #queue-req: 183
  13490. 2025-07-20 15:39:13,895 - __main__ - INFO - sglang running req: 10 queue req: 183
  13491. 2025-07-20 15:39:14,967 - sglang - INFO - [2025-07-20 15:39:14 TP0] Decode batch. #running-req: 10, #token: 29027, token usage: 0.76, gen throughput (token/s): 373.23, #queue-req: 183
  13492. 2025-07-20 15:39:14,967 - __main__ - INFO - sglang running req: 10 queue req: 183
  13493. 2025-07-20 15:39:15,949 - sglang - INFO - [2025-07-20 15:39:15 TP0] Decode batch. #running-req: 10, #token: 29427, token usage: 0.77, gen throughput (token/s): 407.03, #queue-req: 183
  13494. 2025-07-20 15:39:15,950 - __main__ - INFO - sglang running req: 10 queue req: 183
  13495. 2025-07-20 15:39:16,269 - sglang - INFO - [2025-07-20 15:39:16 TP0] Prefill batch. #new-seq: 2, #new-token: 3532, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 181
  13496. 2025-07-20 15:39:16,269 - __main__ - INFO - sglang running req: 9 queue req: 181
  13497. 2025-07-20 15:39:18,140 - sglang - INFO - [2025-07-20 15:39:18 TP0] Decode batch. #running-req: 11, #token: 28786, token usage: 0.76, gen throughput (token/s): 194.43, #queue-req: 181
  13498. 2025-07-20 15:39:18,141 - __main__ - INFO - sglang running req: 11 queue req: 181
  13499. 2025-07-20 15:39:18,560 - sglang - INFO - [2025-07-20 15:39:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2730, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 180
  13500. 2025-07-20 15:39:18,561 - __main__ - INFO - sglang running req: 10 queue req: 180
  13501. 2025-07-20 15:39:19,467 - sglang - INFO - [2025-07-20 15:39:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1709, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 179
  13502. 2025-07-20 15:39:19,468 - __main__ - INFO - sglang running req: 10 queue req: 179
  13503. 2025-07-20 15:39:20,554 - sglang - INFO - [2025-07-20 15:39:20 TP0] Decode batch. #running-req: 11, #token: 29097, token usage: 0.77, gen throughput (token/s): 181.47, #queue-req: 179
  13504. 2025-07-20 15:39:20,554 - __main__ - INFO - sglang running req: 11 queue req: 179
  13505. 2025-07-20 15:39:21,542 - sglang - INFO - [2025-07-20 15:39:21 TP0] Decode batch. #running-req: 11, #token: 29537, token usage: 0.78, gen throughput (token/s): 445.12, #queue-req: 179
  13506. 2025-07-20 15:39:21,543 - __main__ - INFO - sglang running req: 11 queue req: 179
  13507. 2025-07-20 15:39:22,062 - sglang - INFO - [2025-07-20 15:39:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1451, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 178
  13508. 2025-07-20 15:39:22,062 - __main__ - INFO - sglang running req: 10 queue req: 178
  13509. 2025-07-20 15:39:23,082 - sglang - INFO - [2025-07-20 15:39:23 TP0] Decode batch. #running-req: 11, #token: 29157, token usage: 0.77, gen throughput (token/s): 285.16, #queue-req: 178
  13510. 2025-07-20 15:39:23,082 - __main__ - INFO - sglang running req: 11 queue req: 178
  13511. 2025-07-20 15:39:23,790 - __main__ - INFO - Queue remaining: 2
  13512. 2025-07-20 15:39:23,791 - __main__ - INFO -
  13513. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13514. ----------------------------------------------------------------------------------
  13515. sglang_input_tokens 876.78 1024.38
  13516. sglang_output_tokens 252.94 297.28
  13517. 2025-07-20 15:39:23,791 - __main__ - INFO -
  13518. Worker ID | finished | started
  13519. ----------+----------+--------
  13520. 0 | 321 | 500
  13521. 1 | 0 | 10
  13522. 2025-07-20 15:39:24,070 - sglang - INFO - [2025-07-20 15:39:24 TP0] Decode batch. #running-req: 11, #token: 29597, token usage: 0.78, gen throughput (token/s): 445.60, #queue-req: 178
  13523. 2025-07-20 15:39:24,070 - __main__ - INFO - sglang running req: 11 queue req: 178
  13524. 2025-07-20 15:39:25,061 - sglang - INFO - [2025-07-20 15:39:25 TP0] Decode batch. #running-req: 11, #token: 30037, token usage: 0.79, gen throughput (token/s): 443.62, #queue-req: 178
  13525. 2025-07-20 15:39:25,061 - __main__ - INFO - sglang running req: 11 queue req: 178
  13526. 2025-07-20 15:39:25,468 - sglang - INFO - [2025-07-20 15:39:25 TP0] Prefill batch. #new-seq: 1, #new-token: 1522, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 177
  13527. 2025-07-20 15:39:25,468 - __main__ - INFO - sglang running req: 10 queue req: 177
  13528. 2025-07-20 15:39:26,601 - sglang - INFO - [2025-07-20 15:39:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2073, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 176
  13529. 2025-07-20 15:39:26,601 - __main__ - INFO - sglang running req: 10 queue req: 176
  13530. 2025-07-20 15:39:27,310 - sglang - INFO - [2025-07-20 15:39:27 TP0] Decode batch. #running-req: 11, #token: 27786, token usage: 0.73, gen throughput (token/s): 194.74, #queue-req: 176
  13531. 2025-07-20 15:39:27,311 - __main__ - INFO - sglang running req: 11 queue req: 176
  13532. 2025-07-20 15:39:28,301 - sglang - INFO - [2025-07-20 15:39:28 TP0] Decode batch. #running-req: 11, #token: 28226, token usage: 0.74, gen throughput (token/s): 444.19, #queue-req: 176
  13533. 2025-07-20 15:39:28,301 - __main__ - INFO - sglang running req: 11 queue req: 176
  13534. 2025-07-20 15:39:29,217 - sglang - INFO - [2025-07-20 15:39:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 175
  13535. 2025-07-20 15:39:29,218 - __main__ - INFO - sglang running req: 10 queue req: 175
  13536. 2025-07-20 15:39:30,040 - sglang - INFO - [2025-07-20 15:39:30 TP0] Decode batch. #running-req: 11, #token: 28227, token usage: 0.74, gen throughput (token/s): 252.47, #queue-req: 175
  13537. 2025-07-20 15:39:30,040 - __main__ - INFO - sglang running req: 11 queue req: 175
  13538. 2025-07-20 15:39:30,262 - sglang - INFO - [2025-07-20 15:39:30 TP0] Prefill batch. #new-seq: 1, #new-token: 1501, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 174
  13539. 2025-07-20 15:39:30,262 - __main__ - INFO - sglang running req: 10 queue req: 174
  13540. 2025-07-20 15:39:31,589 - sglang - INFO - [2025-07-20 15:39:31 TP0] Decode batch. #running-req: 11, #token: 28489, token usage: 0.75, gen throughput (token/s): 283.29, #queue-req: 174
  13541. 2025-07-20 15:39:31,590 - __main__ - INFO - sglang running req: 11 queue req: 174
  13542. 2025-07-20 15:39:31,738 - sglang - INFO - [2025-07-20 15:39:31 TP0] Prefill batch. #new-seq: 2, #new-token: 4360, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 10, #queue-req: 172
  13543. 2025-07-20 15:39:31,738 - __main__ - INFO - sglang running req: 10 queue req: 172
  13544. 2025-07-20 15:39:33,792 - __main__ - INFO - Queue remaining: 2
  13545. 2025-07-20 15:39:33,792 - __main__ - INFO -
  13546. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13547. ----------------------------------------------------------------------------------
  13548. sglang_input_tokens 879.82 1020.48
  13549. sglang_output_tokens 253.46 291.01
  13550. 2025-07-20 15:39:33,792 - __main__ - INFO -
  13551. Worker ID | finished | started
  13552. ----------+----------+--------
  13553. 0 | 326 | 500
  13554. 1 | 0 | 10
  13555. 2025-07-20 15:39:33,980 - sglang - INFO - [2025-07-20 15:39:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2080, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 171
  13556. 2025-07-20 15:39:33,980 - __main__ - INFO - sglang running req: 11 queue req: 171
  13557. 2025-07-20 15:39:34,690 - sglang - INFO - [2025-07-20 15:39:34 TP0] Decode batch. #running-req: 12, #token: 29679, token usage: 0.78, gen throughput (token/s): 152.25, #queue-req: 171
  13558. 2025-07-20 15:39:34,691 - __main__ - INFO - sglang running req: 12 queue req: 171
  13559. 2025-07-20 15:39:35,088 - sglang - INFO - [2025-07-20 15:39:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 170
  13560. 2025-07-20 15:39:35,088 - __main__ - INFO - sglang running req: 11 queue req: 170
  13561. 2025-07-20 15:39:36,515 - sglang - INFO - [2025-07-20 15:39:36 TP0] Decode batch. #running-req: 12, #token: 30561, token usage: 0.80, gen throughput (token/s): 262.34, #queue-req: 170
  13562. 2025-07-20 15:39:36,515 - __main__ - INFO - sglang running req: 12 queue req: 170
  13563. 2025-07-20 15:39:36,926 - sglang - INFO - [2025-07-20 15:39:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2279, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 169
  13564. 2025-07-20 15:39:36,927 - __main__ - INFO - sglang running req: 11 queue req: 169
  13565. 2025-07-20 15:39:38,249 - sglang - INFO - [2025-07-20 15:39:38 TP0] Decode batch. #running-req: 12, #token: 30089, token usage: 0.79, gen throughput (token/s): 276.32, #queue-req: 169
  13566. 2025-07-20 15:39:38,249 - __main__ - INFO - sglang running req: 12 queue req: 169
  13567. 2025-07-20 15:39:38,647 - sglang - INFO - [2025-07-20 15:39:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2796, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 168
  13568. 2025-07-20 15:39:38,647 - __main__ - INFO - sglang running req: 11 queue req: 168
  13569. 2025-07-20 15:39:40,063 - sglang - INFO - [2025-07-20 15:39:40 TP0] Decode batch. #running-req: 12, #token: 31697, token usage: 0.83, gen throughput (token/s): 264.06, #queue-req: 168
  13570. 2025-07-20 15:39:40,063 - __main__ - INFO - sglang running req: 12 queue req: 168
  13571. 2025-07-20 15:39:41,070 - sglang - INFO - [2025-07-20 15:39:41 TP0] Decode batch. #running-req: 12, #token: 32177, token usage: 0.85, gen throughput (token/s): 476.58, #queue-req: 168
  13572. 2025-07-20 15:39:41,070 - __main__ - INFO - sglang running req: 12 queue req: 168
  13573. 2025-07-20 15:39:41,999 - sglang - INFO - [2025-07-20 15:39:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2771, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 167
  13574. 2025-07-20 15:39:41,999 - __main__ - INFO - sglang running req: 10 queue req: 167
  13575. 2025-07-20 15:39:42,883 - sglang - INFO - [2025-07-20 15:39:42 TP0] Decode batch. #running-req: 11, #token: 29977, token usage: 0.79, gen throughput (token/s): 248.72, #queue-req: 167
  13576. 2025-07-20 15:39:42,883 - __main__ - INFO - sglang running req: 11 queue req: 167
  13577. 2025-07-20 15:39:43,183 - sglang - INFO - [2025-07-20 15:39:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 166
  13578. 2025-07-20 15:39:43,183 - __main__ - INFO - sglang running req: 10 queue req: 166
  13579. 2025-07-20 15:39:43,793 - __main__ - INFO - Queue remaining: 2
  13580. 2025-07-20 15:39:43,794 - __main__ - INFO -
  13581. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13582. ----------------------------------------------------------------------------------
  13583. sglang_input_tokens 885.80 1036.27
  13584. sglang_output_tokens 254.52 293.79
  13585. 2025-07-20 15:39:43,794 - __main__ - INFO -
  13586. Worker ID | finished | started
  13587. ----------+----------+--------
  13588. 0 | 333 | 500
  13589. 1 | 0 | 10
  13590. 2025-07-20 15:39:44,631 - sglang - INFO - [2025-07-20 15:39:44 TP0] Decode batch. #running-req: 11, #token: 29444, token usage: 0.78, gen throughput (token/s): 251.18, #queue-req: 166
  13591. 2025-07-20 15:39:44,631 - __main__ - INFO - sglang running req: 11 queue req: 166
  13592. 2025-07-20 15:39:44,978 - sglang - INFO - [2025-07-20 15:39:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2675, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 165
  13593. 2025-07-20 15:39:44,978 - __main__ - INFO - sglang running req: 10 queue req: 165
  13594. 2025-07-20 15:39:46,411 - sglang - INFO - [2025-07-20 15:39:46 TP0] Decode batch. #running-req: 11, #token: 30650, token usage: 0.81, gen throughput (token/s): 246.66, #queue-req: 165
  13595. 2025-07-20 15:39:46,411 - __main__ - INFO - sglang running req: 11 queue req: 165
  13596. 2025-07-20 15:39:46,857 - sglang - INFO - [2025-07-20 15:39:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1633, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 164
  13597. 2025-07-20 15:39:46,857 - __main__ - INFO - sglang running req: 10 queue req: 164
  13598. 2025-07-20 15:39:47,986 - sglang - INFO - [2025-07-20 15:39:47 TP0] Decode batch. #running-req: 11, #token: 29602, token usage: 0.78, gen throughput (token/s): 278.75, #queue-req: 164
  13599. 2025-07-20 15:39:47,986 - __main__ - INFO - sglang running req: 11 queue req: 164
  13600. 2025-07-20 15:39:48,978 - sglang - INFO - [2025-07-20 15:39:48 TP0] Decode batch. #running-req: 11, #token: 30042, token usage: 0.79, gen throughput (token/s): 443.30, #queue-req: 164
  13601. 2025-07-20 15:39:48,978 - __main__ - INFO - sglang running req: 11 queue req: 164
  13602. 2025-07-20 15:39:49,999 - sglang - INFO - [2025-07-20 15:39:49 TP0] Decode batch. #running-req: 11, #token: 30482, token usage: 0.80, gen throughput (token/s): 430.98, #queue-req: 164
  13603. 2025-07-20 15:39:50,000 - __main__ - INFO - sglang running req: 11 queue req: 164
  13604. 2025-07-20 15:39:51,045 - sglang - INFO - [2025-07-20 15:39:51 TP0] Decode batch. #running-req: 11, #token: 30922, token usage: 0.81, gen throughput (token/s): 420.89, #queue-req: 164
  13605. 2025-07-20 15:39:51,045 - __main__ - INFO - sglang running req: 11 queue req: 164
  13606. 2025-07-20 15:39:51,569 - sglang - INFO - [2025-07-20 15:39:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 163
  13607. 2025-07-20 15:39:51,569 - __main__ - INFO - sglang running req: 10 queue req: 163
  13608. 2025-07-20 15:39:52,949 - sglang - INFO - [2025-07-20 15:39:52 TP0] Decode batch. #running-req: 11, #token: 30786, token usage: 0.81, gen throughput (token/s): 230.54, #queue-req: 163
  13609. 2025-07-20 15:39:52,949 - __main__ - INFO - sglang running req: 11 queue req: 163
  13610. 2025-07-20 15:39:53,795 - __main__ - INFO - Queue remaining: 2
  13611. 2025-07-20 15:39:53,796 - __main__ - INFO -
  13612. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13613. ----------------------------------------------------------------------------------
  13614. sglang_input_tokens 883.07 1034.59
  13615. sglang_output_tokens 253.42 292.83
  13616. 2025-07-20 15:39:53,796 - __main__ - INFO -
  13617. Worker ID | finished | started
  13618. ----------+----------+--------
  13619. 0 | 336 | 500
  13620. 1 | 0 | 10
  13621. 2025-07-20 15:39:53,968 - sglang - INFO - [2025-07-20 15:39:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2778, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 162
  13622. 2025-07-20 15:39:53,968 - __main__ - INFO - sglang running req: 10 queue req: 162
  13623. 2025-07-20 15:39:54,802 - sglang - INFO - [2025-07-20 15:39:54 TP0] Decode batch. #running-req: 11, #token: 30372, token usage: 0.80, gen throughput (token/s): 236.95, #queue-req: 162
  13624. 2025-07-20 15:39:54,802 - __main__ - INFO - sglang running req: 11 queue req: 162
  13625. 2025-07-20 15:39:55,793 - sglang - INFO - [2025-07-20 15:39:55 TP0] Decode batch. #running-req: 11, #token: 30812, token usage: 0.81, gen throughput (token/s): 443.85, #queue-req: 162
  13626. 2025-07-20 15:39:55,793 - __main__ - INFO - sglang running req: 11 queue req: 162
  13627. 2025-07-20 15:39:56,786 - sglang - INFO - [2025-07-20 15:39:56 TP0] Decode batch. #running-req: 10, #token: 28571, token usage: 0.75, gen throughput (token/s): 442.00, #queue-req: 162
  13628. 2025-07-20 15:39:56,786 - __main__ - INFO - sglang running req: 10 queue req: 162
  13629. 2025-07-20 15:39:56,787 - sglang - INFO - [2025-07-20 15:39:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2333, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 161
  13630. 2025-07-20 15:39:56,787 - __main__ - INFO - sglang running req: 10 queue req: 161
  13631. 2025-07-20 15:39:58,010 - sglang - INFO - [2025-07-20 15:39:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1972, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 160
  13632. 2025-07-20 15:39:58,010 - __main__ - INFO - sglang running req: 10 queue req: 160
  13633. 2025-07-20 15:39:59,190 - sglang - INFO - [2025-07-20 15:39:59 TP0] Decode batch. #running-req: 11, #token: 31362, token usage: 0.83, gen throughput (token/s): 182.65, #queue-req: 160
  13634. 2025-07-20 15:39:59,190 - __main__ - INFO - sglang running req: 11 queue req: 160
  13635. 2025-07-20 15:39:59,935 - sglang - INFO - [2025-07-20 15:39:59 TP0] Prefill batch. #new-seq: 1, #new-token: 1774, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 159
  13636. 2025-07-20 15:39:59,935 - __main__ - INFO - sglang running req: 10 queue req: 159
  13637. 2025-07-20 15:40:00,792 - sglang - INFO - [2025-07-20 15:40:00 TP0] Decode batch. #running-req: 11, #token: 30798, token usage: 0.81, gen throughput (token/s): 273.92, #queue-req: 159
  13638. 2025-07-20 15:40:00,793 - __main__ - INFO - sglang running req: 11 queue req: 159
  13639. 2025-07-20 15:40:01,782 - sglang - INFO - [2025-07-20 15:40:01 TP0] Decode batch. #running-req: 11, #token: 31238, token usage: 0.82, gen throughput (token/s): 444.65, #queue-req: 159
  13640. 2025-07-20 15:40:01,783 - __main__ - INFO - sglang running req: 11 queue req: 159
  13641. 2025-07-20 15:40:02,775 - sglang - INFO - [2025-07-20 15:40:02 TP0] Decode batch. #running-req: 11, #token: 31678, token usage: 0.83, gen throughput (token/s): 442.95, #queue-req: 159
  13642. 2025-07-20 15:40:02,775 - __main__ - INFO - sglang running req: 11 queue req: 159
  13643. 2025-07-20 15:40:03,573 - sglang - INFO - [2025-07-20 15:40:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2115, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 158
  13644. 2025-07-20 15:40:03,573 - __main__ - INFO - sglang running req: 10 queue req: 158
  13645. 2025-07-20 15:40:03,797 - __main__ - INFO - Queue remaining: 2
  13646. 2025-07-20 15:40:03,797 - __main__ - INFO -
  13647. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13648. ----------------------------------------------------------------------------------
  13649. sglang_input_tokens 885.79 1031.67
  13650. sglang_output_tokens 254.26 293.35
  13651. 2025-07-20 15:40:03,797 - __main__ - INFO -
  13652. Worker ID | finished | started
  13653. ----------+----------+--------
  13654. 0 | 341 | 500
  13655. 1 | 0 | 10
  13656. 2025-07-20 15:40:04,458 - sglang - INFO - [2025-07-20 15:40:04 TP0] Decode batch. #running-req: 11, #token: 30668, token usage: 0.81, gen throughput (token/s): 260.83, #queue-req: 158
  13657. 2025-07-20 15:40:04,458 - __main__ - INFO - sglang running req: 11 queue req: 158
  13658. 2025-07-20 15:40:05,582 - sglang - INFO - [2025-07-20 15:40:05 TP0] Decode batch. #running-req: 11, #token: 31108, token usage: 0.82, gen throughput (token/s): 391.62, #queue-req: 158
  13659. 2025-07-20 15:40:05,582 - __main__ - INFO - sglang running req: 11 queue req: 158
  13660. 2025-07-20 15:40:05,836 - sglang - INFO - [2025-07-20 15:40:05 TP0] Prefill batch. #new-seq: 1, #new-token: 3184, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 157
  13661. 2025-07-20 15:40:05,836 - __main__ - INFO - sglang running req: 10 queue req: 157
  13662. 2025-07-20 15:40:07,503 - sglang - INFO - [2025-07-20 15:40:07 TP0] Decode batch. #running-req: 11, #token: 31660, token usage: 0.83, gen throughput (token/s): 228.45, #queue-req: 157
  13663. 2025-07-20 15:40:07,504 - __main__ - INFO - sglang running req: 11 queue req: 157
  13664. 2025-07-20 15:40:08,494 - sglang - INFO - [2025-07-20 15:40:08 TP0] Decode batch. #running-req: 11, #token: 32100, token usage: 0.85, gen throughput (token/s): 444.28, #queue-req: 157
  13665. 2025-07-20 15:40:08,494 - __main__ - INFO - sglang running req: 11 queue req: 157
  13666. 2025-07-20 15:40:09,485 - sglang - INFO - [2025-07-20 15:40:09 TP0] Decode batch. #running-req: 11, #token: 32540, token usage: 0.86, gen throughput (token/s): 443.89, #queue-req: 157
  13667. 2025-07-20 15:40:09,485 - __main__ - INFO - sglang running req: 11 queue req: 157
  13668. 2025-07-20 15:40:09,585 - sglang - INFO - [2025-07-20 15:40:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 156
  13669. 2025-07-20 15:40:09,585 - __main__ - INFO - sglang running req: 10 queue req: 156
  13670. 2025-07-20 15:40:10,807 - sglang - INFO - [2025-07-20 15:40:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2129, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 155
  13671. 2025-07-20 15:40:10,807 - __main__ - INFO - sglang running req: 10 queue req: 155
  13672. 2025-07-20 15:40:11,803 - sglang - INFO - [2025-07-20 15:40:11 TP0] Decode batch. #running-req: 11, #token: 31163, token usage: 0.82, gen throughput (token/s): 188.94, #queue-req: 155
  13673. 2025-07-20 15:40:11,803 - __main__ - INFO - sglang running req: 11 queue req: 155
  13674. 2025-07-20 15:40:12,597 - sglang - INFO - [2025-07-20 15:40:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1367, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 154
  13675. 2025-07-20 15:40:12,598 - __main__ - INFO - sglang running req: 10 queue req: 154
  13676. 2025-07-20 15:40:13,316 - sglang - INFO - [2025-07-20 15:40:13 TP0] Decode batch. #running-req: 11, #token: 29364, token usage: 0.77, gen throughput (token/s): 290.12, #queue-req: 154
  13677. 2025-07-20 15:40:13,317 - __main__ - INFO - sglang running req: 11 queue req: 154
  13678. 2025-07-20 15:40:13,564 - sglang - INFO - [2025-07-20 15:40:13 TP0] Prefill batch. #new-seq: 2, #new-token: 2748, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 10, #queue-req: 152
  13679. 2025-07-20 15:40:13,564 - __main__ - INFO - sglang running req: 10 queue req: 152
  13680. 2025-07-20 15:40:13,798 - __main__ - INFO - Queue remaining: 2
  13681. 2025-07-20 15:40:13,798 - __main__ - INFO -
  13682. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13683. ----------------------------------------------------------------------------------
  13684. sglang_input_tokens 889.52 1033.64
  13685. sglang_output_tokens 255.42 292.94
  13686. 2025-07-20 15:40:13,798 - __main__ - INFO -
  13687. Worker ID | finished | started
  13688. ----------+----------+--------
  13689. 0 | 346 | 500
  13690. 1 | 0 | 10
  13691. 2025-07-20 15:40:15,347 - sglang - INFO - [2025-07-20 15:40:15 TP0] Decode batch. #running-req: 12, #token: 29111, token usage: 0.77, gen throughput (token/s): 230.95, #queue-req: 152
  13692. 2025-07-20 15:40:15,347 - __main__ - INFO - sglang running req: 12 queue req: 152
  13693. 2025-07-20 15:40:15,769 - sglang - INFO - [2025-07-20 15:40:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 151
  13694. 2025-07-20 15:40:15,769 - __main__ - INFO - sglang running req: 11 queue req: 151
  13695. 2025-07-20 15:40:16,773 - sglang - INFO - [2025-07-20 15:40:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2625, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 11, #queue-req: 150
  13696. 2025-07-20 15:40:16,773 - __main__ - INFO - sglang running req: 11 queue req: 150
  13697. 2025-07-20 15:40:17,859 - sglang - INFO - [2025-07-20 15:40:17 TP0] Decode batch. #running-req: 12, #token: 28745, token usage: 0.76, gen throughput (token/s): 190.34, #queue-req: 150
  13698. 2025-07-20 15:40:17,859 - __main__ - INFO - sglang running req: 12 queue req: 150
  13699. 2025-07-20 15:40:18,385 - sglang - INFO - [2025-07-20 15:40:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1818, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 11, #queue-req: 149
  13700. 2025-07-20 15:40:18,385 - __main__ - INFO - sglang running req: 11 queue req: 149
  13701. 2025-07-20 15:40:19,527 - sglang - INFO - [2025-07-20 15:40:19 TP0] Decode batch. #running-req: 12, #token: 27619, token usage: 0.73, gen throughput (token/s): 287.02, #queue-req: 149
  13702. 2025-07-20 15:40:19,527 - __main__ - INFO - sglang running req: 12 queue req: 149
  13703. 2025-07-20 15:40:20,145 - sglang - INFO - [2025-07-20 15:40:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2126, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 148
  13704. 2025-07-20 15:40:20,145 - __main__ - INFO - sglang running req: 11 queue req: 148
  13705. 2025-07-20 15:40:21,189 - sglang - INFO - [2025-07-20 15:40:21 TP0] Decode batch. #running-req: 12, #token: 28713, token usage: 0.76, gen throughput (token/s): 288.17, #queue-req: 148
  13706. 2025-07-20 15:40:21,189 - __main__ - INFO - sglang running req: 12 queue req: 148
  13707. 2025-07-20 15:40:21,412 - sglang - INFO - [2025-07-20 15:40:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2381, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 11, #queue-req: 147
  13708. 2025-07-20 15:40:21,413 - __main__ - INFO - sglang running req: 11 queue req: 147
  13709. 2025-07-20 15:40:22,608 - sglang - INFO - [2025-07-20 15:40:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2369, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 11, #queue-req: 146
  13710. 2025-07-20 15:40:22,608 - __main__ - INFO - sglang running req: 11 queue req: 146
  13711. 2025-07-20 15:40:23,678 - sglang - INFO - [2025-07-20 15:40:23 TP0] Decode batch. #running-req: 12, #token: 28768, token usage: 0.76, gen throughput (token/s): 192.10, #queue-req: 146
  13712. 2025-07-20 15:40:23,678 - __main__ - INFO - sglang running req: 12 queue req: 146
  13713. 2025-07-20 15:40:23,799 - __main__ - INFO - Queue remaining: 2
  13714. 2025-07-20 15:40:23,799 - __main__ - INFO -
  13715. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13716. ----------------------------------------------------------------------------------
  13717. sglang_input_tokens 893.22 1039.33
  13718. sglang_output_tokens 255.97 292.16
  13719. 2025-07-20 15:40:23,799 - __main__ - INFO -
  13720. Worker ID | finished | started
  13721. ----------+----------+--------
  13722. 0 | 352 | 500
  13723. 1 | 0 | 10
  13724. 2025-07-20 15:40:24,590 - sglang - INFO - [2025-07-20 15:40:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2572, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 145
  13725. 2025-07-20 15:40:24,590 - __main__ - INFO - sglang running req: 11 queue req: 145
  13726. 2025-07-20 15:40:25,452 - sglang - INFO - [2025-07-20 15:40:25 TP0] Decode batch. #running-req: 12, #token: 30208, token usage: 0.80, gen throughput (token/s): 269.94, #queue-req: 145
  13727. 2025-07-20 15:40:25,452 - __main__ - INFO - sglang running req: 12 queue req: 145
  13728. 2025-07-20 15:40:25,939 - sglang - INFO - [2025-07-20 15:40:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 144
  13729. 2025-07-20 15:40:25,939 - __main__ - INFO - sglang running req: 11 queue req: 144
  13730. 2025-07-20 15:40:27,236 - sglang - INFO - [2025-07-20 15:40:27 TP0] Decode batch. #running-req: 12, #token: 31218, token usage: 0.82, gen throughput (token/s): 268.53, #queue-req: 144
  13731. 2025-07-20 15:40:27,236 - __main__ - INFO - sglang running req: 12 queue req: 144
  13732. 2025-07-20 15:40:28,357 - sglang - INFO - [2025-07-20 15:40:28 TP0] Decode batch. #running-req: 12, #token: 31698, token usage: 0.83, gen throughput (token/s): 428.14, #queue-req: 144
  13733. 2025-07-20 15:40:28,357 - __main__ - INFO - sglang running req: 12 queue req: 144
  13734. 2025-07-20 15:40:29,361 - sglang - INFO - [2025-07-20 15:40:29 TP0] Decode batch. #running-req: 11, #token: 30054, token usage: 0.79, gen throughput (token/s): 457.17, #queue-req: 144
  13735. 2025-07-20 15:40:29,361 - __main__ - INFO - sglang running req: 11 queue req: 144
  13736. 2025-07-20 15:40:30,129 - sglang - INFO - [2025-07-20 15:40:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2327, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 143
  13737. 2025-07-20 15:40:30,129 - __main__ - INFO - sglang running req: 10 queue req: 143
  13738. 2025-07-20 15:40:31,101 - sglang - INFO - [2025-07-20 15:40:31 TP0] Decode batch. #running-req: 11, #token: 29131, token usage: 0.77, gen throughput (token/s): 252.33, #queue-req: 143
  13739. 2025-07-20 15:40:31,101 - __main__ - INFO - sglang running req: 11 queue req: 143
  13740. 2025-07-20 15:40:32,086 - sglang - INFO - [2025-07-20 15:40:32 TP0] Decode batch. #running-req: 11, #token: 29571, token usage: 0.78, gen throughput (token/s): 446.58, #queue-req: 143
  13741. 2025-07-20 15:40:32,086 - __main__ - INFO - sglang running req: 11 queue req: 143
  13742. 2025-07-20 15:40:32,581 - sglang - INFO - [2025-07-20 15:40:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2127, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 142
  13743. 2025-07-20 15:40:32,581 - __main__ - INFO - sglang running req: 10 queue req: 142
  13744. 2025-07-20 15:40:33,749 - sglang - INFO - [2025-07-20 15:40:33 TP0] Decode batch. #running-req: 11, #token: 29960, token usage: 0.79, gen throughput (token/s): 264.09, #queue-req: 142
  13745. 2025-07-20 15:40:33,749 - __main__ - INFO - sglang running req: 11 queue req: 142
  13746. 2025-07-20 15:40:33,800 - __main__ - INFO - Queue remaining: 2
  13747. 2025-07-20 15:40:33,800 - __main__ - INFO -
  13748. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13749. ----------------------------------------------------------------------------------
  13750. sglang_input_tokens 893.46 1024.07
  13751. sglang_output_tokens 255.43 284.95
  13752. 2025-07-20 15:40:33,801 - __main__ - INFO -
  13753. Worker ID | finished | started
  13754. ----------+----------+--------
  13755. 0 | 357 | 500
  13756. 1 | 0 | 10
  13757. 2025-07-20 15:40:34,462 - sglang - INFO - [2025-07-20 15:40:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2527, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 141
  13758. 2025-07-20 15:40:34,462 - __main__ - INFO - sglang running req: 10 queue req: 141
  13759. 2025-07-20 15:40:35,583 - sglang - INFO - [2025-07-20 15:40:35 TP0] Decode batch. #running-req: 11, #token: 30056, token usage: 0.79, gen throughput (token/s): 239.34, #queue-req: 141
  13760. 2025-07-20 15:40:35,583 - __main__ - INFO - sglang running req: 11 queue req: 141
  13761. 2025-07-20 15:40:36,752 - sglang - INFO - [2025-07-20 15:40:36 TP0] Decode batch. #running-req: 11, #token: 30496, token usage: 0.80, gen throughput (token/s): 376.38, #queue-req: 141
  13762. 2025-07-20 15:40:36,752 - __main__ - INFO - sglang running req: 11 queue req: 141
  13763. 2025-07-20 15:40:37,524 - sglang - INFO - [2025-07-20 15:40:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2644, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 140
  13764. 2025-07-20 15:40:37,524 - __main__ - INFO - sglang running req: 10 queue req: 140
  13765. 2025-07-20 15:40:38,588 - sglang - INFO - [2025-07-20 15:40:38 TP0] Decode batch. #running-req: 11, #token: 30809, token usage: 0.81, gen throughput (token/s): 239.13, #queue-req: 140
  13766. 2025-07-20 15:40:38,588 - __main__ - INFO - sglang running req: 11 queue req: 140
  13767. 2025-07-20 15:40:39,578 - sglang - INFO - [2025-07-20 15:40:39 TP0] Decode batch. #running-req: 11, #token: 31249, token usage: 0.82, gen throughput (token/s): 444.28, #queue-req: 140
  13768. 2025-07-20 15:40:39,578 - __main__ - INFO - sglang running req: 11 queue req: 140
  13769. 2025-07-20 15:40:40,174 - sglang - INFO - [2025-07-20 15:40:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2495, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 139
  13770. 2025-07-20 15:40:40,175 - __main__ - INFO - sglang running req: 10 queue req: 139
  13771. 2025-07-20 15:40:41,328 - sglang - INFO - [2025-07-20 15:40:41 TP0] Decode batch. #running-req: 11, #token: 30964, token usage: 0.82, gen throughput (token/s): 250.83, #queue-req: 139
  13772. 2025-07-20 15:40:41,328 - __main__ - INFO - sglang running req: 11 queue req: 139
  13773. 2025-07-20 15:40:41,428 - sglang - INFO - [2025-07-20 15:40:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1731, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 138
  13774. 2025-07-20 15:40:41,428 - __main__ - INFO - sglang running req: 10 queue req: 138
  13775. 2025-07-20 15:40:42,770 - sglang - INFO - [2025-07-20 15:40:42 TP0] Prefill batch. #new-seq: 1, #new-token: 1754, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 137
  13776. 2025-07-20 15:40:42,770 - __main__ - INFO - sglang running req: 10 queue req: 137
  13777. 2025-07-20 15:40:43,710 - sglang - INFO - [2025-07-20 15:40:43 TP0] Decode batch. #running-req: 11, #token: 29102, token usage: 0.77, gen throughput (token/s): 183.89, #queue-req: 137
  13778. 2025-07-20 15:40:43,710 - __main__ - INFO - sglang running req: 11 queue req: 137
  13779. 2025-07-20 15:40:43,802 - __main__ - INFO - Queue remaining: 2
  13780. 2025-07-20 15:40:43,802 - __main__ - INFO -
  13781. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13782. ----------------------------------------------------------------------------------
  13783. sglang_input_tokens 896.28 1053.09
  13784. sglang_output_tokens 255.93 291.94
  13785. 2025-07-20 15:40:43,802 - __main__ - INFO -
  13786. Worker ID | finished | started
  13787. ----------+----------+--------
  13788. 0 | 362 | 500
  13789. 1 | 0 | 10
  13790. 2025-07-20 15:40:44,749 - sglang - INFO - [2025-07-20 15:40:44 TP0] Decode batch. #running-req: 11, #token: 29542, token usage: 0.78, gen throughput (token/s): 423.25, #queue-req: 137
  13791. 2025-07-20 15:40:44,750 - __main__ - INFO - sglang running req: 11 queue req: 137
  13792. 2025-07-20 15:40:44,948 - sglang - INFO - [2025-07-20 15:40:44 TP0] Prefill batch. #new-seq: 1, #new-token: 2367, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 136
  13793. 2025-07-20 15:40:44,948 - __main__ - INFO - sglang running req: 10 queue req: 136
  13794. 2025-07-20 15:40:45,871 - sglang - INFO - [2025-07-20 15:40:45 TP0] Prefill batch. #new-seq: 2, #new-token: 3932, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 10, #queue-req: 134
  13795. 2025-07-20 15:40:45,871 - __main__ - INFO - sglang running req: 10 queue req: 134
  13796. 2025-07-20 15:40:47,756 - sglang - INFO - [2025-07-20 15:40:47 TP0] Decode batch. #running-req: 12, #token: 29497, token usage: 0.78, gen throughput (token/s): 154.00, #queue-req: 134
  13797. 2025-07-20 15:40:47,756 - __main__ - INFO - sglang running req: 12 queue req: 134
  13798. 2025-07-20 15:40:47,855 - sglang - INFO - [2025-07-20 15:40:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 11, #queue-req: 133
  13799. 2025-07-20 15:40:47,855 - __main__ - INFO - sglang running req: 11 queue req: 133
  13800. 2025-07-20 15:40:49,578 - sglang - INFO - [2025-07-20 15:40:49 TP0] Decode batch. #running-req: 12, #token: 30052, token usage: 0.79, gen throughput (token/s): 262.86, #queue-req: 133
  13801. 2025-07-20 15:40:49,578 - __main__ - INFO - sglang running req: 12 queue req: 133
  13802. 2025-07-20 15:40:50,570 - sglang - INFO - [2025-07-20 15:40:50 TP0] Decode batch. #running-req: 12, #token: 30532, token usage: 0.80, gen throughput (token/s): 484.09, #queue-req: 133
  13803. 2025-07-20 15:40:50,570 - __main__ - INFO - sglang running req: 12 queue req: 133
  13804. 2025-07-20 15:40:50,972 - sglang - INFO - [2025-07-20 15:40:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2204, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 132
  13805. 2025-07-20 15:40:50,972 - __main__ - INFO - sglang running req: 11 queue req: 132
  13806. 2025-07-20 15:40:52,438 - sglang - INFO - [2025-07-20 15:40:52 TP0] Decode batch. #running-req: 12, #token: 29986, token usage: 0.79, gen throughput (token/s): 256.49, #queue-req: 132
  13807. 2025-07-20 15:40:52,438 - __main__ - INFO - sglang running req: 12 queue req: 132
  13808. 2025-07-20 15:40:53,568 - sglang - INFO - [2025-07-20 15:40:53 TP0] Decode batch. #running-req: 12, #token: 30466, token usage: 0.80, gen throughput (token/s): 424.71, #queue-req: 132
  13809. 2025-07-20 15:40:53,568 - __main__ - INFO - sglang running req: 12 queue req: 132
  13810. 2025-07-20 15:40:53,803 - __main__ - INFO - Queue remaining: 2
  13811. 2025-07-20 15:40:53,804 - __main__ - INFO -
  13812. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13813. ----------------------------------------------------------------------------------
  13814. sglang_input_tokens 897.40 1054.66
  13815. sglang_output_tokens 256.17 292.68
  13816. 2025-07-20 15:40:53,804 - __main__ - INFO -
  13817. Worker ID | finished | started
  13818. ----------+----------+--------
  13819. 0 | 366 | 500
  13820. 1 | 0 | 10
  13821. 2025-07-20 15:40:54,693 - sglang - INFO - [2025-07-20 15:40:54 TP0] Decode batch. #running-req: 12, #token: 29283, token usage: 0.77, gen throughput (token/s): 426.35, #queue-req: 132
  13822. 2025-07-20 15:40:54,694 - __main__ - INFO - sglang running req: 12 queue req: 132
  13823. 2025-07-20 15:40:54,718 - sglang - INFO - [2025-07-20 15:40:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1423, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 131
  13824. 2025-07-20 15:40:54,718 - __main__ - INFO - sglang running req: 11 queue req: 131
  13825. 2025-07-20 15:40:56,215 - sglang - INFO - [2025-07-20 15:40:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2209, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 130
  13826. 2025-07-20 15:40:56,216 - __main__ - INFO - sglang running req: 11 queue req: 130
  13827. 2025-07-20 15:40:56,966 - sglang - INFO - [2025-07-20 15:40:56 TP0] Decode batch. #running-req: 12, #token: 30419, token usage: 0.80, gen throughput (token/s): 210.33, #queue-req: 130
  13828. 2025-07-20 15:40:56,966 - __main__ - INFO - sglang running req: 12 queue req: 130
  13829. 2025-07-20 15:40:57,190 - sglang - INFO - [2025-07-20 15:40:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2313, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 129
  13830. 2025-07-20 15:40:57,190 - __main__ - INFO - sglang running req: 11 queue req: 129
  13831. 2025-07-20 15:40:58,813 - sglang - INFO - [2025-07-20 15:40:58 TP0] Decode batch. #running-req: 12, #token: 30496, token usage: 0.80, gen throughput (token/s): 259.34, #queue-req: 129
  13832. 2025-07-20 15:40:58,813 - __main__ - INFO - sglang running req: 12 queue req: 129
  13833. 2025-07-20 15:40:59,107 - sglang - INFO - [2025-07-20 15:40:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2900, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 128
  13834. 2025-07-20 15:40:59,107 - __main__ - INFO - sglang running req: 11 queue req: 128
  13835. 2025-07-20 15:41:00,820 - sglang - INFO - [2025-07-20 15:41:00 TP0] Decode batch. #running-req: 11, #token: 29574, token usage: 0.78, gen throughput (token/s): 232.20, #queue-req: 128
  13836. 2025-07-20 15:41:00,820 - __main__ - INFO - sglang running req: 11 queue req: 128
  13837. 2025-07-20 15:41:01,911 - sglang - INFO - [2025-07-20 15:41:01 TP0] Decode batch. #running-req: 11, #token: 30014, token usage: 0.79, gen throughput (token/s): 403.40, #queue-req: 128
  13838. 2025-07-20 15:41:01,911 - __main__ - INFO - sglang running req: 11 queue req: 128
  13839. 2025-07-20 15:41:02,949 - sglang - INFO - [2025-07-20 15:41:02 TP0] Decode batch. #running-req: 11, #token: 30454, token usage: 0.80, gen throughput (token/s): 423.59, #queue-req: 128
  13840. 2025-07-20 15:41:02,950 - __main__ - INFO - sglang running req: 11 queue req: 128
  13841. 2025-07-20 15:41:03,447 - sglang - INFO - [2025-07-20 15:41:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2671, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 127
  13842. 2025-07-20 15:41:03,447 - __main__ - INFO - sglang running req: 10 queue req: 127
  13843. 2025-07-20 15:41:03,806 - __main__ - INFO - Queue remaining: 2
  13844. 2025-07-20 15:41:03,806 - __main__ - INFO -
  13845. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13846. ----------------------------------------------------------------------------------
  13847. sglang_input_tokens 899.28 1036.11
  13848. sglang_output_tokens 256.09 284.11
  13849. 2025-07-20 15:41:03,806 - __main__ - INFO -
  13850. Worker ID | finished | started
  13851. ----------+----------+--------
  13852. 0 | 372 | 500
  13853. 1 | 0 | 10
  13854. 2025-07-20 15:41:04,805 - sglang - INFO - [2025-07-20 15:41:04 TP0] Decode batch. #running-req: 11, #token: 31923, token usage: 0.84, gen throughput (token/s): 236.62, #queue-req: 127
  13855. 2025-07-20 15:41:04,805 - __main__ - INFO - sglang running req: 11 queue req: 127
  13856. 2025-07-20 15:41:05,912 - sglang - INFO - [2025-07-20 15:41:05 TP0] Decode batch. #running-req: 11, #token: 32363, token usage: 0.85, gen throughput (token/s): 397.46, #queue-req: 127
  13857. 2025-07-20 15:41:05,912 - __main__ - INFO - sglang running req: 11 queue req: 127
  13858. 2025-07-20 15:41:07,047 - sglang - INFO - [2025-07-20 15:41:07 TP0] Decode batch. #running-req: 11, #token: 32803, token usage: 0.86, gen throughput (token/s): 387.43, #queue-req: 127
  13859. 2025-07-20 15:41:07,047 - __main__ - INFO - sglang running req: 11 queue req: 127
  13860. 2025-07-20 15:41:07,160 - sglang - INFO - [2025-07-20 15:41:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1875, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 126
  13861. 2025-07-20 15:41:07,160 - __main__ - INFO - sglang running req: 10 queue req: 126
  13862. 2025-07-20 15:41:08,738 - sglang - INFO - [2025-07-20 15:41:08 TP0] Decode batch. #running-req: 11, #token: 32195, token usage: 0.85, gen throughput (token/s): 259.64, #queue-req: 126
  13863. 2025-07-20 15:41:08,738 - __main__ - INFO - sglang running req: 11 queue req: 126
  13864. 2025-07-20 15:41:09,871 - sglang - INFO - [2025-07-20 15:41:09 TP0] Decode batch. #running-req: 11, #token: 32635, token usage: 0.86, gen throughput (token/s): 388.46, #queue-req: 126
  13865. 2025-07-20 15:41:09,871 - __main__ - INFO - sglang running req: 11 queue req: 126
  13866. 2025-07-20 15:41:09,983 - sglang - INFO - [2025-07-20 15:41:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2011, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 125
  13867. 2025-07-20 15:41:09,983 - __main__ - INFO - sglang running req: 10 queue req: 125
  13868. 2025-07-20 15:41:11,697 - sglang - INFO - [2025-07-20 15:41:11 TP0] Decode batch. #running-req: 11, #token: 31851, token usage: 0.84, gen throughput (token/s): 240.45, #queue-req: 125
  13869. 2025-07-20 15:41:11,697 - __main__ - INFO - sglang running req: 11 queue req: 125
  13870. 2025-07-20 15:41:12,819 - sglang - INFO - [2025-07-20 15:41:12 TP0] Decode batch. #running-req: 11, #token: 32291, token usage: 0.85, gen throughput (token/s): 392.02, #queue-req: 125
  13871. 2025-07-20 15:41:12,819 - __main__ - INFO - sglang running req: 11 queue req: 125
  13872. 2025-07-20 15:41:13,458 - sglang - INFO - [2025-07-20 15:41:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2939, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 124
  13873. 2025-07-20 15:41:13,459 - __main__ - INFO - sglang running req: 10 queue req: 124
  13874. 2025-07-20 15:41:13,808 - __main__ - INFO - Queue remaining: 2
  13875. 2025-07-20 15:41:13,809 - __main__ - INFO -
  13876. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13877. ----------------------------------------------------------------------------------
  13878. sglang_input_tokens 897.64 1037.25
  13879. sglang_output_tokens 255.81 286.63
  13880. 2025-07-20 15:41:13,809 - __main__ - INFO -
  13881. Worker ID | finished | started
  13882. ----------+----------+--------
  13883. 0 | 375 | 500
  13884. 1 | 0 | 10
  13885. 2025-07-20 15:41:14,699 - sglang - INFO - [2025-07-20 15:41:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 123
  13886. 2025-07-20 15:41:14,699 - __main__ - INFO - sglang running req: 10 queue req: 123
  13887. 2025-07-20 15:41:15,562 - sglang - INFO - [2025-07-20 15:41:15 TP0] Decode batch. #running-req: 10, #token: 28281, token usage: 0.74, gen throughput (token/s): 159.34, #queue-req: 123
  13888. 2025-07-20 15:41:15,562 - __main__ - INFO - sglang running req: 10 queue req: 123
  13889. 2025-07-20 15:41:15,562 - sglang - INFO - [2025-07-20 15:41:15 TP0] Prefill batch. #new-seq: 1, #new-token: 2586, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 122
  13890. 2025-07-20 15:41:15,562 - __main__ - INFO - sglang running req: 10 queue req: 122
  13891. 2025-07-20 15:41:17,561 - sglang - INFO - [2025-07-20 15:41:17 TP0] Decode batch. #running-req: 11, #token: 31307, token usage: 0.82, gen throughput (token/s): 220.05, #queue-req: 122
  13892. 2025-07-20 15:41:17,561 - __main__ - INFO - sglang running req: 11 queue req: 122
  13893. 2025-07-20 15:41:18,656 - sglang - INFO - [2025-07-20 15:41:18 TP0] Decode batch. #running-req: 11, #token: 31747, token usage: 0.84, gen throughput (token/s): 402.10, #queue-req: 122
  13894. 2025-07-20 15:41:18,656 - __main__ - INFO - sglang running req: 11 queue req: 122
  13895. 2025-07-20 15:41:19,658 - sglang - INFO - [2025-07-20 15:41:19 TP0] Decode batch. #running-req: 10, #token: 29940, token usage: 0.79, gen throughput (token/s): 438.00, #queue-req: 122
  13896. 2025-07-20 15:41:19,658 - __main__ - INFO - sglang running req: 10 queue req: 122
  13897. 2025-07-20 15:41:19,658 - sglang - INFO - [2025-07-20 15:41:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1851, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 121
  13898. 2025-07-20 15:41:19,658 - __main__ - INFO - sglang running req: 10 queue req: 121
  13899. 2025-07-20 15:41:20,481 - sglang - INFO - [2025-07-20 15:41:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2162, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 120
  13900. 2025-07-20 15:41:20,482 - __main__ - INFO - sglang running req: 10 queue req: 120
  13901. 2025-07-20 15:41:21,626 - sglang - INFO - [2025-07-20 15:41:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2298, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 119
  13902. 2025-07-20 15:41:21,627 - __main__ - INFO - sglang running req: 10 queue req: 119
  13903. 2025-07-20 15:41:22,707 - sglang - INFO - [2025-07-20 15:41:22 TP0] Decode batch. #running-req: 11, #token: 30013, token usage: 0.79, gen throughput (token/s): 143.62, #queue-req: 119
  13904. 2025-07-20 15:41:22,708 - __main__ - INFO - sglang running req: 11 queue req: 119
  13905. 2025-07-20 15:41:23,479 - sglang - INFO - [2025-07-20 15:41:23 TP0] Prefill batch. #new-seq: 2, #new-token: 4757, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.62, #running-req: 9, #queue-req: 117
  13906. 2025-07-20 15:41:23,479 - __main__ - INFO - sglang running req: 9 queue req: 117
  13907. 2025-07-20 15:41:23,810 - __main__ - INFO - Queue remaining: 2
  13908. 2025-07-20 15:41:23,811 - __main__ - INFO -
  13909. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13910. ----------------------------------------------------------------------------------
  13911. sglang_input_tokens 905.47 1051.12
  13912. sglang_output_tokens 258.13 290.71
  13913. 2025-07-20 15:41:23,811 - __main__ - INFO -
  13914. Worker ID | finished | started
  13915. ----------+----------+--------
  13916. 0 | 382 | 500
  13917. 1 | 0 | 10
  13918. 2025-07-20 15:41:25,161 - sglang - INFO - [2025-07-20 15:41:25 TP0] Decode batch. #running-req: 11, #token: 28565, token usage: 0.75, gen throughput (token/s): 178.49, #queue-req: 117
  13919. 2025-07-20 15:41:25,161 - __main__ - INFO - sglang running req: 11 queue req: 117
  13920. 2025-07-20 15:41:26,149 - sglang - INFO - [2025-07-20 15:41:26 TP0] Decode batch. #running-req: 11, #token: 29005, token usage: 0.76, gen throughput (token/s): 445.71, #queue-req: 117
  13921. 2025-07-20 15:41:26,149 - __main__ - INFO - sglang running req: 11 queue req: 117
  13922. 2025-07-20 15:41:27,136 - sglang - INFO - [2025-07-20 15:41:27 TP0] Decode batch. #running-req: 11, #token: 29445, token usage: 0.78, gen throughput (token/s): 445.45, #queue-req: 117
  13923. 2025-07-20 15:41:27,137 - __main__ - INFO - sglang running req: 11 queue req: 117
  13924. 2025-07-20 15:41:28,133 - sglang - INFO - [2025-07-20 15:41:28 TP0] Decode batch. #running-req: 11, #token: 29885, token usage: 0.79, gen throughput (token/s): 441.40, #queue-req: 117
  13925. 2025-07-20 15:41:28,133 - __main__ - INFO - sglang running req: 11 queue req: 117
  13926. 2025-07-20 15:41:28,776 - sglang - INFO - [2025-07-20 15:41:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2372, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 116
  13927. 2025-07-20 15:41:28,776 - __main__ - INFO - sglang running req: 10 queue req: 116
  13928. 2025-07-20 15:41:29,951 - sglang - INFO - [2025-07-20 15:41:29 TP0] Decode batch. #running-req: 11, #token: 30280, token usage: 0.80, gen throughput (token/s): 241.52, #queue-req: 116
  13929. 2025-07-20 15:41:29,951 - __main__ - INFO - sglang running req: 11 queue req: 116
  13930. 2025-07-20 15:41:30,942 - sglang - INFO - [2025-07-20 15:41:30 TP0] Decode batch. #running-req: 11, #token: 30720, token usage: 0.81, gen throughput (token/s): 444.05, #queue-req: 116
  13931. 2025-07-20 15:41:30,942 - __main__ - INFO - sglang running req: 11 queue req: 116
  13932. 2025-07-20 15:41:31,931 - sglang - INFO - [2025-07-20 15:41:31 TP0] Decode batch. #running-req: 11, #token: 31160, token usage: 0.82, gen throughput (token/s): 444.87, #queue-req: 116
  13933. 2025-07-20 15:41:31,931 - __main__ - INFO - sglang running req: 11 queue req: 116
  13934. 2025-07-20 15:41:32,352 - sglang - INFO - [2025-07-20 15:41:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2282, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 115
  13935. 2025-07-20 15:41:32,352 - __main__ - INFO - sglang running req: 10 queue req: 115
  13936. 2025-07-20 15:41:33,655 - sglang - INFO - [2025-07-20 15:41:33 TP0] Decode batch. #running-req: 11, #token: 31694, token usage: 0.83, gen throughput (token/s): 254.67, #queue-req: 115
  13937. 2025-07-20 15:41:33,655 - __main__ - INFO - sglang running req: 11 queue req: 115
  13938. 2025-07-20 15:41:33,811 - __main__ - INFO - Queue remaining: 2
  13939. 2025-07-20 15:41:33,812 - __main__ - INFO -
  13940. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13941. ----------------------------------------------------------------------------------
  13942. sglang_input_tokens 900.14 1029.45
  13943. sglang_output_tokens 256.43 283.40
  13944. 2025-07-20 15:41:33,812 - __main__ - INFO -
  13945. Worker ID | finished | started
  13946. ----------+----------+--------
  13947. 0 | 384 | 500
  13948. 1 | 0 | 10
  13949. 2025-07-20 15:41:34,027 - sglang - INFO - [2025-07-20 15:41:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 114
  13950. 2025-07-20 15:41:34,028 - __main__ - INFO - sglang running req: 10 queue req: 114
  13951. 2025-07-20 15:41:35,404 - sglang - INFO - [2025-07-20 15:41:35 TP0] Decode batch. #running-req: 11, #token: 30785, token usage: 0.81, gen throughput (token/s): 250.99, #queue-req: 114
  13952. 2025-07-20 15:41:35,404 - __main__ - INFO - sglang running req: 11 queue req: 114
  13953. 2025-07-20 15:41:35,528 - sglang - INFO - [2025-07-20 15:41:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2121, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 113
  13954. 2025-07-20 15:41:35,528 - __main__ - INFO - sglang running req: 10 queue req: 113
  13955. 2025-07-20 15:41:37,064 - sglang - INFO - [2025-07-20 15:41:37 TP0] Decode batch. #running-req: 11, #token: 29850, token usage: 0.79, gen throughput (token/s): 264.46, #queue-req: 113
  13956. 2025-07-20 15:41:37,064 - __main__ - INFO - sglang running req: 11 queue req: 113
  13957. 2025-07-20 15:41:38,055 - sglang - INFO - [2025-07-20 15:41:38 TP0] Decode batch. #running-req: 11, #token: 30290, token usage: 0.80, gen throughput (token/s): 444.07, #queue-req: 113
  13958. 2025-07-20 15:41:38,055 - __main__ - INFO - sglang running req: 11 queue req: 113
  13959. 2025-07-20 15:41:38,846 - sglang - INFO - [2025-07-20 15:41:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1775, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 10, #queue-req: 112
  13960. 2025-07-20 15:41:38,846 - __main__ - INFO - sglang running req: 10 queue req: 112
  13961. 2025-07-20 15:41:39,653 - sglang - INFO - [2025-07-20 15:41:39 TP0] Decode batch. #running-req: 11, #token: 29287, token usage: 0.77, gen throughput (token/s): 274.72, #queue-req: 112
  13962. 2025-07-20 15:41:39,653 - __main__ - INFO - sglang running req: 11 queue req: 112
  13963. 2025-07-20 15:41:39,752 - sglang - INFO - [2025-07-20 15:41:39 TP0] Prefill batch. #new-seq: 2, #new-token: 4722, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 10, #queue-req: 110
  13964. 2025-07-20 15:41:39,752 - __main__ - INFO - sglang running req: 10 queue req: 110
  13965. 2025-07-20 15:41:41,211 - sglang - INFO - [2025-07-20 15:41:41 TP0] Prefill batch. #new-seq: 1, #new-token: 1773, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 109
  13966. 2025-07-20 15:41:41,211 - __main__ - INFO - sglang running req: 11 queue req: 109
  13967. 2025-07-20 15:41:42,692 - sglang - INFO - [2025-07-20 15:41:42 TP0] Decode batch. #running-req: 12, #token: 30993, token usage: 0.82, gen throughput (token/s): 155.95, #queue-req: 109
  13968. 2025-07-20 15:41:42,692 - __main__ - INFO - sglang running req: 12 queue req: 109
  13969. 2025-07-20 15:41:43,688 - sglang - INFO - [2025-07-20 15:41:43 TP0] Decode batch. #running-req: 12, #token: 31473, token usage: 0.83, gen throughput (token/s): 481.97, #queue-req: 109
  13970. 2025-07-20 15:41:43,688 - __main__ - INFO - sglang running req: 12 queue req: 109
  13971. 2025-07-20 15:41:43,813 - __main__ - INFO - Queue remaining: 2
  13972. 2025-07-20 15:41:43,813 - __main__ - INFO -
  13973. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  13974. ----------------------------------------------------------------------------------
  13975. sglang_input_tokens 903.43 1053.85
  13976. sglang_output_tokens 257.34 290.15
  13977. 2025-07-20 15:41:43,813 - __main__ - INFO -
  13978. Worker ID | finished | started
  13979. ----------+----------+--------
  13980. 0 | 389 | 500
  13981. 1 | 0 | 10
  13982. 2025-07-20 15:41:44,684 - sglang - INFO - [2025-07-20 15:41:44 TP0] Decode batch. #running-req: 12, #token: 31953, token usage: 0.84, gen throughput (token/s): 481.55, #queue-req: 109
  13983. 2025-07-20 15:41:44,685 - __main__ - INFO - sglang running req: 12 queue req: 109
  13984. 2025-07-20 15:41:45,681 - sglang - INFO - [2025-07-20 15:41:45 TP0] Decode batch. #running-req: 12, #token: 32433, token usage: 0.85, gen throughput (token/s): 481.49, #queue-req: 109
  13985. 2025-07-20 15:41:45,681 - __main__ - INFO - sglang running req: 12 queue req: 109
  13986. 2025-07-20 15:41:46,676 - sglang - INFO - [2025-07-20 15:41:46 TP0] Decode batch. #running-req: 12, #token: 32913, token usage: 0.87, gen throughput (token/s): 482.60, #queue-req: 109
  13987. 2025-07-20 15:41:46,676 - __main__ - INFO - sglang running req: 12 queue req: 109
  13988. 2025-07-20 15:41:47,274 - sglang - INFO - [2025-07-20 15:41:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2738, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 108
  13989. 2025-07-20 15:41:47,274 - __main__ - INFO - sglang running req: 11 queue req: 108
  13990. 2025-07-20 15:41:48,480 - sglang - INFO - [2025-07-20 15:41:48 TP0] Decode batch. #running-req: 12, #token: 32310, token usage: 0.85, gen throughput (token/s): 265.51, #queue-req: 108
  13991. 2025-07-20 15:41:48,480 - __main__ - INFO - sglang running req: 12 queue req: 108
  13992. 2025-07-20 15:41:49,488 - sglang - INFO - [2025-07-20 15:41:49 TP0] Decode batch. #running-req: 12, #token: 32790, token usage: 0.86, gen throughput (token/s): 476.42, #queue-req: 108
  13993. 2025-07-20 15:41:49,488 - __main__ - INFO - sglang running req: 12 queue req: 108
  13994. 2025-07-20 15:41:50,488 - sglang - INFO - [2025-07-20 15:41:50 TP0] Decode batch. #running-req: 11, #token: 30275, token usage: 0.80, gen throughput (token/s): 479.01, #queue-req: 108
  13995. 2025-07-20 15:41:50,488 - __main__ - INFO - sglang running req: 11 queue req: 108
  13996. 2025-07-20 15:41:50,488 - sglang - INFO - [2025-07-20 15:41:50 TP0] Prefill batch. #new-seq: 1, #new-token: 1941, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 11, #queue-req: 107
  13997. 2025-07-20 15:41:50,488 - __main__ - INFO - sglang running req: 11 queue req: 107
  13998. 2025-07-20 15:41:51,541 - sglang - INFO - [2025-07-20 15:41:51 TP0] Prefill batch. #new-seq: 1, #new-token: 1272, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 11, #queue-req: 106
  13999. 2025-07-20 15:41:51,541 - __main__ - INFO - sglang running req: 11 queue req: 106
  14000. 2025-07-20 15:41:52,296 - sglang - INFO - [2025-07-20 15:41:52 TP0] Prefill batch. #new-seq: 1, #new-token: 1269, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 105
  14001. 2025-07-20 15:41:52,296 - __main__ - INFO - sglang running req: 11 queue req: 105
  14002. 2025-07-20 15:41:53,152 - sglang - INFO - [2025-07-20 15:41:53 TP0] Decode batch. #running-req: 12, #token: 29980, token usage: 0.79, gen throughput (token/s): 179.44, #queue-req: 105
  14003. 2025-07-20 15:41:53,152 - __main__ - INFO - sglang running req: 12 queue req: 105
  14004. 2025-07-20 15:41:53,815 - __main__ - INFO - Queue remaining: 2
  14005. 2025-07-20 15:41:53,815 - __main__ - INFO -
  14006. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14007. ----------------------------------------------------------------------------------
  14008. sglang_input_tokens 903.56 1035.16
  14009. sglang_output_tokens 257.66 287.25
  14010. 2025-07-20 15:41:53,815 - __main__ - INFO -
  14011. Worker ID | finished | started
  14012. ----------+----------+--------
  14013. 0 | 393 | 500
  14014. 1 | 0 | 10
  14015. 2025-07-20 15:41:54,142 - sglang - INFO - [2025-07-20 15:41:54 TP0] Decode batch. #running-req: 12, #token: 30460, token usage: 0.80, gen throughput (token/s): 484.44, #queue-req: 105
  14016. 2025-07-20 15:41:54,143 - __main__ - INFO - sglang running req: 12 queue req: 105
  14017. 2025-07-20 15:41:54,192 - sglang - INFO - [2025-07-20 15:41:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2107, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 11, #queue-req: 104
  14018. 2025-07-20 15:41:54,192 - __main__ - INFO - sglang running req: 11 queue req: 104
  14019. 2025-07-20 15:41:55,069 - sglang - INFO - [2025-07-20 15:41:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2562, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 11, #queue-req: 103
  14020. 2025-07-20 15:41:55,069 - __main__ - INFO - sglang running req: 11 queue req: 103
  14021. 2025-07-20 15:41:56,622 - sglang - INFO - [2025-07-20 15:41:56 TP0] Decode batch. #running-req: 12, #token: 29137, token usage: 0.77, gen throughput (token/s): 192.73, #queue-req: 103
  14022. 2025-07-20 15:41:56,623 - __main__ - INFO - sglang running req: 12 queue req: 103
  14023. 2025-07-20 15:41:57,614 - sglang - INFO - [2025-07-20 15:41:57 TP0] Decode batch. #running-req: 12, #token: 29617, token usage: 0.78, gen throughput (token/s): 483.98, #queue-req: 103
  14024. 2025-07-20 15:41:57,615 - __main__ - INFO - sglang running req: 12 queue req: 103
  14025. 2025-07-20 15:41:57,912 - sglang - INFO - [2025-07-20 15:41:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1964, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 11, #queue-req: 102
  14026. 2025-07-20 15:41:57,912 - __main__ - INFO - sglang running req: 11 queue req: 102
  14027. 2025-07-20 15:41:58,619 - sglang - INFO - [2025-07-20 15:41:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 11, #queue-req: 101
  14028. 2025-07-20 15:41:58,619 - __main__ - INFO - sglang running req: 11 queue req: 101
  14029. 2025-07-20 15:41:59,441 - sglang - INFO - [2025-07-20 15:41:59 TP0] Prefill batch. #new-seq: 1, #new-token: 3261, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 11, #queue-req: 100
  14030. 2025-07-20 15:41:59,442 - __main__ - INFO - sglang running req: 11 queue req: 100
  14031. 2025-07-20 15:42:00,550 - sglang - INFO - [2025-07-20 15:42:00 TP0] Prefill batch. #new-seq: 1, #new-token: 2649, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 99
  14032. 2025-07-20 15:42:00,550 - __main__ - INFO - sglang running req: 10 queue req: 99
  14033. 2025-07-20 15:42:01,586 - sglang - INFO - [2025-07-20 15:42:01 TP0] Prefill batch. #new-seq: 1, #new-token: 2396, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 98
  14034. 2025-07-20 15:42:01,586 - __main__ - INFO - sglang running req: 10 queue req: 98
  14035. 2025-07-20 15:42:02,385 - sglang - INFO - [2025-07-20 15:42:02 TP0] Decode batch. #running-req: 11, #token: 28450, token usage: 0.75, gen throughput (token/s): 96.84, #queue-req: 98
  14036. 2025-07-20 15:42:02,385 - __main__ - INFO - sglang running req: 11 queue req: 98
  14037. 2025-07-20 15:42:03,247 - sglang - INFO - [2025-07-20 15:42:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2385, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 97
  14038. 2025-07-20 15:42:03,247 - __main__ - INFO - sglang running req: 10 queue req: 97
  14039. 2025-07-20 15:42:03,818 - __main__ - INFO - Queue remaining: 2
  14040. 2025-07-20 15:42:03,818 - __main__ - INFO -
  14041. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14042. ----------------------------------------------------------------------------------
  14043. sglang_input_tokens 912.80 1062.73
  14044. sglang_output_tokens 259.75 293.32
  14045. 2025-07-20 15:42:03,818 - __main__ - INFO -
  14046. Worker ID | finished | started
  14047. ----------+----------+--------
  14048. 0 | 402 | 500
  14049. 1 | 0 | 10
  14050. 2025-07-20 15:42:04,120 - sglang - INFO - [2025-07-20 15:42:04 TP0] Decode batch. #running-req: 11, #token: 28484, token usage: 0.75, gen throughput (token/s): 253.02, #queue-req: 97
  14051. 2025-07-20 15:42:04,120 - __main__ - INFO - sglang running req: 11 queue req: 97
  14052. 2025-07-20 15:42:05,105 - sglang - INFO - [2025-07-20 15:42:05 TP0] Decode batch. #running-req: 11, #token: 28924, token usage: 0.76, gen throughput (token/s): 446.50, #queue-req: 97
  14053. 2025-07-20 15:42:05,106 - __main__ - INFO - sglang running req: 11 queue req: 97
  14054. 2025-07-20 15:42:05,624 - sglang - INFO - [2025-07-20 15:42:05 TP0] Prefill batch. #new-seq: 1, #new-token: 2626, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 96
  14055. 2025-07-20 15:42:05,624 - __main__ - INFO - sglang running req: 10 queue req: 96
  14056. 2025-07-20 15:42:06,882 - sglang - INFO - [2025-07-20 15:42:06 TP0] Decode batch. #running-req: 11, #token: 29748, token usage: 0.78, gen throughput (token/s): 247.15, #queue-req: 96
  14057. 2025-07-20 15:42:06,882 - __main__ - INFO - sglang running req: 11 queue req: 96
  14058. 2025-07-20 15:42:07,870 - sglang - INFO - [2025-07-20 15:42:07 TP0] Decode batch. #running-req: 11, #token: 30188, token usage: 0.79, gen throughput (token/s): 445.15, #queue-req: 96
  14059. 2025-07-20 15:42:07,870 - __main__ - INFO - sglang running req: 11 queue req: 96
  14060. 2025-07-20 15:42:08,341 - sglang - INFO - [2025-07-20 15:42:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2874, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 95
  14061. 2025-07-20 15:42:08,341 - __main__ - INFO - sglang running req: 10 queue req: 95
  14062. 2025-07-20 15:42:09,688 - sglang - INFO - [2025-07-20 15:42:09 TP0] Decode batch. #running-req: 11, #token: 30182, token usage: 0.79, gen throughput (token/s): 241.50, #queue-req: 95
  14063. 2025-07-20 15:42:09,688 - __main__ - INFO - sglang running req: 11 queue req: 95
  14064. 2025-07-20 15:42:10,676 - sglang - INFO - [2025-07-20 15:42:10 TP0] Decode batch. #running-req: 11, #token: 30622, token usage: 0.81, gen throughput (token/s): 445.28, #queue-req: 95
  14065. 2025-07-20 15:42:10,676 - __main__ - INFO - sglang running req: 11 queue req: 95
  14066. 2025-07-20 15:42:11,472 - sglang - INFO - [2025-07-20 15:42:11 TP0] Prefill batch. #new-seq: 1, #new-token: 1918, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 94
  14067. 2025-07-20 15:42:11,472 - __main__ - INFO - sglang running req: 10 queue req: 94
  14068. 2025-07-20 15:42:12,333 - sglang - INFO - [2025-07-20 15:42:12 TP0] Decode batch. #running-req: 11, #token: 30483, token usage: 0.80, gen throughput (token/s): 264.92, #queue-req: 94
  14069. 2025-07-20 15:42:12,333 - __main__ - INFO - sglang running req: 11 queue req: 94
  14070. 2025-07-20 15:42:13,322 - sglang - INFO - [2025-07-20 15:42:13 TP0] Decode batch. #running-req: 11, #token: 30923, token usage: 0.81, gen throughput (token/s): 445.14, #queue-req: 94
  14071. 2025-07-20 15:42:13,322 - __main__ - INFO - sglang running req: 11 queue req: 94
  14072. 2025-07-20 15:42:13,819 - __main__ - INFO - Queue remaining: 2
  14073. 2025-07-20 15:42:13,819 - __main__ - INFO -
  14074. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14075. ----------------------------------------------------------------------------------
  14076. sglang_input_tokens 910.30 1051.30
  14077. sglang_output_tokens 258.73 290.07
  14078. 2025-07-20 15:42:13,819 - __main__ - INFO -
  14079. Worker ID | finished | started
  14080. ----------+----------+--------
  14081. 0 | 405 | 500
  14082. 1 | 0 | 10
  14083. 2025-07-20 15:42:14,312 - sglang - INFO - [2025-07-20 15:42:14 TP0] Decode batch. #running-req: 11, #token: 31363, token usage: 0.83, gen throughput (token/s): 444.35, #queue-req: 94
  14084. 2025-07-20 15:42:14,312 - __main__ - INFO - sglang running req: 11 queue req: 94
  14085. 2025-07-20 15:42:15,304 - sglang - INFO - [2025-07-20 15:42:15 TP0] Decode batch. #running-req: 11, #token: 31803, token usage: 0.84, gen throughput (token/s): 443.28, #queue-req: 94
  14086. 2025-07-20 15:42:15,305 - __main__ - INFO - sglang running req: 11 queue req: 94
  14087. 2025-07-20 15:42:16,297 - sglang - INFO - [2025-07-20 15:42:16 TP0] Decode batch. #running-req: 11, #token: 32243, token usage: 0.85, gen throughput (token/s): 443.42, #queue-req: 94
  14088. 2025-07-20 15:42:16,297 - __main__ - INFO - sglang running req: 11 queue req: 94
  14089. 2025-07-20 15:42:16,990 - sglang - INFO - [2025-07-20 15:42:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2169, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 93
  14090. 2025-07-20 15:42:16,991 - __main__ - INFO - sglang running req: 10 queue req: 93
  14091. 2025-07-20 15:42:17,964 - sglang - INFO - [2025-07-20 15:42:17 TP0] Decode batch. #running-req: 11, #token: 31711, token usage: 0.83, gen throughput (token/s): 263.35, #queue-req: 93
  14092. 2025-07-20 15:42:17,964 - __main__ - INFO - sglang running req: 11 queue req: 93
  14093. 2025-07-20 15:42:18,038 - sglang - INFO - [2025-07-20 15:42:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2421, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 92
  14094. 2025-07-20 15:42:18,038 - __main__ - INFO - sglang running req: 10 queue req: 92
  14095. 2025-07-20 15:42:19,858 - sglang - INFO - [2025-07-20 15:42:19 TP0] Decode batch. #running-req: 10, #token: 29179, token usage: 0.77, gen throughput (token/s): 231.21, #queue-req: 92
  14096. 2025-07-20 15:42:19,858 - __main__ - INFO - sglang running req: 10 queue req: 92
  14097. 2025-07-20 15:42:19,858 - sglang - INFO - [2025-07-20 15:42:19 TP0] Prefill batch. #new-seq: 1, #new-token: 3211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 91
  14098. 2025-07-20 15:42:19,859 - __main__ - INFO - sglang running req: 10 queue req: 91
  14099. 2025-07-20 15:42:21,436 - sglang - INFO - [2025-07-20 15:42:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2187, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 90
  14100. 2025-07-20 15:42:21,436 - __main__ - INFO - sglang running req: 10 queue req: 90
  14101. 2025-07-20 15:42:22,484 - sglang - INFO - [2025-07-20 15:42:22 TP0] Decode batch. #running-req: 11, #token: 28526, token usage: 0.75, gen throughput (token/s): 167.21, #queue-req: 90
  14102. 2025-07-20 15:42:22,484 - __main__ - INFO - sglang running req: 11 queue req: 90
  14103. 2025-07-20 15:42:22,509 - sglang - INFO - [2025-07-20 15:42:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1759, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 89
  14104. 2025-07-20 15:42:22,509 - __main__ - INFO - sglang running req: 10 queue req: 89
  14105. 2025-07-20 15:42:23,820 - __main__ - INFO - Queue remaining: 2
  14106. 2025-07-20 15:42:23,821 - __main__ - INFO -
  14107. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14108. ----------------------------------------------------------------------------------
  14109. sglang_input_tokens 913.10 1052.30
  14110. sglang_output_tokens 259.11 289.47
  14111. 2025-07-20 15:42:23,821 - __main__ - INFO -
  14112. Worker ID | finished | started
  14113. ----------+----------+--------
  14114. 0 | 410 | 500
  14115. 1 | 0 | 10
  14116. 2025-07-20 15:42:24,079 - sglang - INFO - [2025-07-20 15:42:24 TP0] Decode batch. #running-req: 11, #token: 30724, token usage: 0.81, gen throughput (token/s): 275.17, #queue-req: 89
  14117. 2025-07-20 15:42:24,079 - __main__ - INFO - sglang running req: 11 queue req: 89
  14118. 2025-07-20 15:42:24,696 - sglang - INFO - [2025-07-20 15:42:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 88
  14119. 2025-07-20 15:42:24,696 - __main__ - INFO - sglang running req: 10 queue req: 88
  14120. 2025-07-20 15:42:25,590 - sglang - INFO - [2025-07-20 15:42:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2445, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 87
  14121. 2025-07-20 15:42:25,590 - __main__ - INFO - sglang running req: 10 queue req: 87
  14122. 2025-07-20 15:42:26,465 - sglang - INFO - [2025-07-20 15:42:26 TP0] Decode batch. #running-req: 11, #token: 29396, token usage: 0.77, gen throughput (token/s): 183.58, #queue-req: 87
  14123. 2025-07-20 15:42:26,465 - __main__ - INFO - sglang running req: 11 queue req: 87
  14124. 2025-07-20 15:42:26,515 - sglang - INFO - [2025-07-20 15:42:26 TP0] Prefill batch. #new-seq: 1, #new-token: 1511, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 10, #queue-req: 86
  14125. 2025-07-20 15:42:26,515 - __main__ - INFO - sglang running req: 10 queue req: 86
  14126. 2025-07-20 15:42:28,012 - sglang - INFO - [2025-07-20 15:42:28 TP0] Decode batch. #running-req: 11, #token: 29059, token usage: 0.76, gen throughput (token/s): 283.72, #queue-req: 86
  14127. 2025-07-20 15:42:28,012 - __main__ - INFO - sglang running req: 11 queue req: 86
  14128. 2025-07-20 15:42:28,824 - sglang - INFO - [2025-07-20 15:42:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2401, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 85
  14129. 2025-07-20 15:42:28,824 - __main__ - INFO - sglang running req: 10 queue req: 85
  14130. 2025-07-20 15:42:29,728 - sglang - INFO - [2025-07-20 15:42:29 TP0] Decode batch. #running-req: 11, #token: 28662, token usage: 0.75, gen throughput (token/s): 255.84, #queue-req: 85
  14131. 2025-07-20 15:42:29,728 - __main__ - INFO - sglang running req: 11 queue req: 85
  14132. 2025-07-20 15:42:30,712 - sglang - INFO - [2025-07-20 15:42:30 TP0] Decode batch. #running-req: 11, #token: 29102, token usage: 0.77, gen throughput (token/s): 447.01, #queue-req: 85
  14133. 2025-07-20 15:42:30,713 - __main__ - INFO - sglang running req: 11 queue req: 85
  14134. 2025-07-20 15:42:31,697 - sglang - INFO - [2025-07-20 15:42:31 TP0] Decode batch. #running-req: 11, #token: 29542, token usage: 0.78, gen throughput (token/s): 447.08, #queue-req: 85
  14135. 2025-07-20 15:42:31,697 - __main__ - INFO - sglang running req: 11 queue req: 85
  14136. 2025-07-20 15:42:32,657 - sglang - INFO - [2025-07-20 15:42:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2574, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 84
  14137. 2025-07-20 15:42:32,658 - __main__ - INFO - sglang running req: 10 queue req: 84
  14138. 2025-07-20 15:42:33,464 - sglang - INFO - [2025-07-20 15:42:33 TP0] Decode batch. #running-req: 11, #token: 30848, token usage: 0.81, gen throughput (token/s): 248.36, #queue-req: 84
  14139. 2025-07-20 15:42:33,464 - __main__ - INFO - sglang running req: 11 queue req: 84
  14140. 2025-07-20 15:42:33,822 - __main__ - INFO - Queue remaining: 2
  14141. 2025-07-20 15:42:33,822 - __main__ - INFO -
  14142. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14143. ----------------------------------------------------------------------------------
  14144. sglang_input_tokens 914.70 1056.36
  14145. sglang_output_tokens 259.00 288.49
  14146. 2025-07-20 15:42:33,823 - __main__ - INFO -
  14147. Worker ID | finished | started
  14148. ----------+----------+--------
  14149. 0 | 415 | 500
  14150. 1 | 0 | 10
  14151. 2025-07-20 15:42:34,208 - sglang - INFO - [2025-07-20 15:42:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2965, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 83
  14152. 2025-07-20 15:42:34,209 - __main__ - INFO - sglang running req: 10 queue req: 83
  14153. 2025-07-20 15:42:35,322 - sglang - INFO - [2025-07-20 15:42:35 TP0] Decode batch. #running-req: 11, #token: 31682, token usage: 0.83, gen throughput (token/s): 236.34, #queue-req: 83
  14154. 2025-07-20 15:42:35,322 - __main__ - INFO - sglang running req: 11 queue req: 83
  14155. 2025-07-20 15:42:35,644 - sglang - INFO - [2025-07-20 15:42:35 TP0] Prefill batch. #new-seq: 1, #new-token: 1903, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 82
  14156. 2025-07-20 15:42:35,644 - __main__ - INFO - sglang running req: 10 queue req: 82
  14157. 2025-07-20 15:42:36,960 - sglang - INFO - [2025-07-20 15:42:36 TP0] Decode batch. #running-req: 11, #token: 30437, token usage: 0.80, gen throughput (token/s): 267.98, #queue-req: 82
  14158. 2025-07-20 15:42:36,960 - __main__ - INFO - sglang running req: 11 queue req: 82
  14159. 2025-07-20 15:42:37,949 - sglang - INFO - [2025-07-20 15:42:37 TP0] Decode batch. #running-req: 11, #token: 30877, token usage: 0.81, gen throughput (token/s): 444.92, #queue-req: 82
  14160. 2025-07-20 15:42:37,949 - __main__ - INFO - sglang running req: 11 queue req: 82
  14161. 2025-07-20 15:42:38,543 - sglang - INFO - [2025-07-20 15:42:38 TP0] Prefill batch. #new-seq: 1, #new-token: 2292, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 10, #queue-req: 81
  14162. 2025-07-20 15:42:38,543 - __main__ - INFO - sglang running req: 10 queue req: 81
  14163. 2025-07-20 15:42:39,663 - sglang - INFO - [2025-07-20 15:42:39 TP0] Decode batch. #running-req: 11, #token: 31427, token usage: 0.83, gen throughput (token/s): 256.13, #queue-req: 81
  14164. 2025-07-20 15:42:39,663 - __main__ - INFO - sglang running req: 11 queue req: 81
  14165. 2025-07-20 15:42:40,653 - sglang - INFO - [2025-07-20 15:42:40 TP0] Decode batch. #running-req: 11, #token: 31867, token usage: 0.84, gen throughput (token/s): 444.34, #queue-req: 81
  14166. 2025-07-20 15:42:40,653 - __main__ - INFO - sglang running req: 11 queue req: 81
  14167. 2025-07-20 15:42:41,647 - sglang - INFO - [2025-07-20 15:42:41 TP0] Decode batch. #running-req: 11, #token: 32307, token usage: 0.85, gen throughput (token/s): 442.53, #queue-req: 81
  14168. 2025-07-20 15:42:41,648 - __main__ - INFO - sglang running req: 11 queue req: 81
  14169. 2025-07-20 15:42:41,971 - sglang - INFO - [2025-07-20 15:42:41 TP0] Prefill batch. #new-seq: 1, #new-token: 2096, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 80
  14170. 2025-07-20 15:42:41,971 - __main__ - INFO - sglang running req: 10 queue req: 80
  14171. 2025-07-20 15:42:43,328 - sglang - INFO - [2025-07-20 15:42:43 TP0] Decode batch. #running-req: 11, #token: 32519, token usage: 0.86, gen throughput (token/s): 261.21, #queue-req: 80
  14172. 2025-07-20 15:42:43,328 - __main__ - INFO - sglang running req: 11 queue req: 80
  14173. 2025-07-20 15:42:43,824 - __main__ - INFO - Queue remaining: 2
  14174. 2025-07-20 15:42:43,824 - __main__ - INFO -
  14175. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14176. ----------------------------------------------------------------------------------
  14177. sglang_input_tokens 914.24 1044.39
  14178. sglang_output_tokens 258.45 283.01
  14179. 2025-07-20 15:42:43,824 - __main__ - INFO -
  14180. Worker ID | finished | started
  14181. ----------+----------+--------
  14182. 0 | 419 | 500
  14183. 1 | 0 | 10
  14184. 2025-07-20 15:42:44,323 - sglang - INFO - [2025-07-20 15:42:44 TP0] Decode batch. #running-req: 11, #token: 32959, token usage: 0.87, gen throughput (token/s): 442.26, #queue-req: 80
  14185. 2025-07-20 15:42:44,323 - __main__ - INFO - sglang running req: 11 queue req: 80
  14186. 2025-07-20 15:42:45,392 - sglang - INFO - [2025-07-20 15:42:45 TP0] Decode batch. #running-req: 11, #token: 33399, token usage: 0.88, gen throughput (token/s): 411.52, #queue-req: 80
  14187. 2025-07-20 15:42:45,392 - __main__ - INFO - sglang running req: 11 queue req: 80
  14188. 2025-07-20 15:42:46,390 - sglang - INFO - [2025-07-20 15:42:46 TP0] Decode batch. #running-req: 11, #token: 33839, token usage: 0.89, gen throughput (token/s): 440.69, #queue-req: 80
  14189. 2025-07-20 15:42:46,391 - __main__ - INFO - sglang running req: 11 queue req: 80
  14190. 2025-07-20 15:42:47,385 - sglang - INFO - [2025-07-20 15:42:47 TP0] Decode batch. #running-req: 11, #token: 34279, token usage: 0.90, gen throughput (token/s): 442.36, #queue-req: 80
  14191. 2025-07-20 15:42:47,385 - __main__ - INFO - sglang running req: 11 queue req: 80
  14192. 2025-07-20 15:42:48,380 - sglang - INFO - [2025-07-20 15:42:48 TP0] Decode batch. #running-req: 10, #token: 31589, token usage: 0.83, gen throughput (token/s): 423.10, #queue-req: 80
  14193. 2025-07-20 15:42:48,381 - __main__ - INFO - sglang running req: 10 queue req: 80
  14194. 2025-07-20 15:42:48,775 - sglang - INFO - [2025-07-20 15:42:48 TP0] Prefill batch. #new-seq: 1, #new-token: 1387, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 79
  14195. 2025-07-20 15:42:48,775 - __main__ - INFO - sglang running req: 9 queue req: 79
  14196. 2025-07-20 15:42:49,418 - sglang - INFO - [2025-07-20 15:42:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2621, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 78
  14197. 2025-07-20 15:42:49,419 - __main__ - INFO - sglang running req: 9 queue req: 78
  14198. 2025-07-20 15:42:50,476 - sglang - INFO - [2025-07-20 15:42:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 77
  14199. 2025-07-20 15:42:50,476 - __main__ - INFO - sglang running req: 9 queue req: 77
  14200. 2025-07-20 15:42:51,418 - sglang - INFO - [2025-07-20 15:42:51 TP0] Decode batch. #running-req: 10, #token: 29770, token usage: 0.78, gen throughput (token/s): 130.67, #queue-req: 77
  14201. 2025-07-20 15:42:51,419 - __main__ - INFO - sglang running req: 10 queue req: 77
  14202. 2025-07-20 15:42:52,402 - sglang - INFO - [2025-07-20 15:42:52 TP0] Decode batch. #running-req: 10, #token: 30170, token usage: 0.79, gen throughput (token/s): 406.58, #queue-req: 77
  14203. 2025-07-20 15:42:52,402 - __main__ - INFO - sglang running req: 10 queue req: 77
  14204. 2025-07-20 15:42:52,599 - sglang - INFO - [2025-07-20 15:42:52 TP0] Prefill batch. #new-seq: 1, #new-token: 2078, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 76
  14205. 2025-07-20 15:42:52,599 - __main__ - INFO - sglang running req: 9 queue req: 76
  14206. 2025-07-20 15:42:53,825 - __main__ - INFO - Queue remaining: 2
  14207. 2025-07-20 15:42:53,825 - __main__ - INFO -
  14208. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14209. ----------------------------------------------------------------------------------
  14210. sglang_input_tokens 916.19 1056.89
  14211. sglang_output_tokens 259.02 287.26
  14212. 2025-07-20 15:42:53,826 - __main__ - INFO -
  14213. Worker ID | finished | started
  14214. ----------+----------+--------
  14215. 0 | 424 | 500
  14216. 1 | 0 | 10
  14217. 2025-07-20 15:42:54,067 - sglang - INFO - [2025-07-20 15:42:54 TP0] Decode batch. #running-req: 10, #token: 29932, token usage: 0.79, gen throughput (token/s): 239.67, #queue-req: 76
  14218. 2025-07-20 15:42:54,067 - __main__ - INFO - sglang running req: 10 queue req: 76
  14219. 2025-07-20 15:42:54,633 - sglang - INFO - [2025-07-20 15:42:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2271, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 75
  14220. 2025-07-20 15:42:54,633 - __main__ - INFO - sglang running req: 9 queue req: 75
  14221. 2025-07-20 15:42:55,431 - sglang - INFO - [2025-07-20 15:42:55 TP0] Prefill batch. #new-seq: 1, #new-token: 2238, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 9, #queue-req: 74
  14222. 2025-07-20 15:42:55,431 - __main__ - INFO - sglang running req: 9 queue req: 74
  14223. 2025-07-20 15:42:56,350 - sglang - INFO - [2025-07-20 15:42:56 TP0] Prefill batch. #new-seq: 2, #new-token: 4141, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.64, #running-req: 9, #queue-req: 72
  14224. 2025-07-20 15:42:56,350 - __main__ - INFO - sglang running req: 9 queue req: 72
  14225. 2025-07-20 15:42:57,819 - sglang - INFO - [2025-07-20 15:42:57 TP0] Decode batch. #running-req: 11, #token: 28566, token usage: 0.75, gen throughput (token/s): 107.41, #queue-req: 72
  14226. 2025-07-20 15:42:57,819 - __main__ - INFO - sglang running req: 11 queue req: 72
  14227. 2025-07-20 15:42:58,804 - sglang - INFO - [2025-07-20 15:42:58 TP0] Decode batch. #running-req: 11, #token: 29006, token usage: 0.76, gen throughput (token/s): 446.64, #queue-req: 72
  14228. 2025-07-20 15:42:58,804 - __main__ - INFO - sglang running req: 11 queue req: 72
  14229. 2025-07-20 15:42:59,125 - sglang - INFO - [2025-07-20 15:42:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2579, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 71
  14230. 2025-07-20 15:42:59,125 - __main__ - INFO - sglang running req: 10 queue req: 71
  14231. 2025-07-20 15:43:00,576 - sglang - INFO - [2025-07-20 15:43:00 TP0] Decode batch. #running-req: 11, #token: 30441, token usage: 0.80, gen throughput (token/s): 247.75, #queue-req: 71
  14232. 2025-07-20 15:43:00,576 - __main__ - INFO - sglang running req: 11 queue req: 71
  14233. 2025-07-20 15:43:01,565 - sglang - INFO - [2025-07-20 15:43:01 TP0] Decode batch. #running-req: 11, #token: 30881, token usage: 0.81, gen throughput (token/s): 444.91, #queue-req: 71
  14234. 2025-07-20 15:43:01,565 - __main__ - INFO - sglang running req: 11 queue req: 71
  14235. 2025-07-20 15:43:01,664 - sglang - INFO - [2025-07-20 15:43:01 TP0] Prefill batch. #new-seq: 1, #new-token: 3211, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 70
  14236. 2025-07-20 15:43:01,664 - __main__ - INFO - sglang running req: 10 queue req: 70
  14237. 2025-07-20 15:43:03,453 - sglang - INFO - [2025-07-20 15:43:03 TP0] Decode batch. #running-req: 11, #token: 31240, token usage: 0.82, gen throughput (token/s): 232.50, #queue-req: 70
  14238. 2025-07-20 15:43:03,453 - __main__ - INFO - sglang running req: 11 queue req: 70
  14239. 2025-07-20 15:43:03,802 - sglang - INFO - [2025-07-20 15:43:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2376, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 69
  14240. 2025-07-20 15:43:03,802 - __main__ - INFO - sglang running req: 10 queue req: 69
  14241. 2025-07-20 15:43:03,826 - __main__ - INFO - Queue remaining: 2
  14242. 2025-07-20 15:43:03,827 - __main__ - INFO -
  14243. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14244. ----------------------------------------------------------------------------------
  14245. sglang_input_tokens 920.23 1058.23
  14246. sglang_output_tokens 260.09 287.64
  14247. 2025-07-20 15:43:03,827 - __main__ - INFO -
  14248. Worker ID | finished | started
  14249. ----------+----------+--------
  14250. 0 | 430 | 500
  14251. 1 | 0 | 10
  14252. 2025-07-20 15:43:04,722 - sglang - INFO - [2025-07-20 15:43:04 TP0] Prefill batch. #new-seq: 1, #new-token: 1547, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 10, #queue-req: 68
  14253. 2025-07-20 15:43:04,722 - __main__ - INFO - sglang running req: 10 queue req: 68
  14254. 2025-07-20 15:43:05,769 - sglang - INFO - [2025-07-20 15:43:05 TP0] Decode batch. #running-req: 11, #token: 29815, token usage: 0.78, gen throughput (token/s): 189.09, #queue-req: 68
  14255. 2025-07-20 15:43:05,770 - __main__ - INFO - sglang running req: 11 queue req: 68
  14256. 2025-07-20 15:43:06,755 - sglang - INFO - [2025-07-20 15:43:06 TP0] Decode batch. #running-req: 11, #token: 30255, token usage: 0.80, gen throughput (token/s): 446.23, #queue-req: 68
  14257. 2025-07-20 15:43:06,756 - __main__ - INFO - sglang running req: 11 queue req: 68
  14258. 2025-07-20 15:43:07,745 - sglang - INFO - [2025-07-20 15:43:07 TP0] Decode batch. #running-req: 11, #token: 30695, token usage: 0.81, gen throughput (token/s): 444.69, #queue-req: 68
  14259. 2025-07-20 15:43:07,745 - __main__ - INFO - sglang running req: 11 queue req: 68
  14260. 2025-07-20 15:43:08,735 - sglang - INFO - [2025-07-20 15:43:08 TP0] Decode batch. #running-req: 11, #token: 31135, token usage: 0.82, gen throughput (token/s): 444.63, #queue-req: 68
  14261. 2025-07-20 15:43:08,735 - __main__ - INFO - sglang running req: 11 queue req: 68
  14262. 2025-07-20 15:43:09,724 - sglang - INFO - [2025-07-20 15:43:09 TP0] Decode batch. #running-req: 11, #token: 31575, token usage: 0.83, gen throughput (token/s): 444.93, #queue-req: 68
  14263. 2025-07-20 15:43:09,724 - __main__ - INFO - sglang running req: 11 queue req: 68
  14264. 2025-07-20 15:43:10,712 - sglang - INFO - [2025-07-20 15:43:10 TP0] Decode batch. #running-req: 11, #token: 32015, token usage: 0.84, gen throughput (token/s): 445.08, #queue-req: 68
  14265. 2025-07-20 15:43:10,712 - __main__ - INFO - sglang running req: 11 queue req: 68
  14266. 2025-07-20 15:43:11,455 - sglang - INFO - [2025-07-20 15:43:11 TP0] Prefill batch. #new-seq: 1, #new-token: 2480, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 67
  14267. 2025-07-20 15:43:11,456 - __main__ - INFO - sglang running req: 10 queue req: 67
  14268. 2025-07-20 15:43:12,456 - sglang - INFO - [2025-07-20 15:43:12 TP0] Decode batch. #running-req: 9, #token: 25570, token usage: 0.67, gen throughput (token/s): 250.49, #queue-req: 67
  14269. 2025-07-20 15:43:12,457 - __main__ - INFO - sglang running req: 9 queue req: 67
  14270. 2025-07-20 15:43:12,457 - sglang - INFO - [2025-07-20 15:43:12 TP0] Prefill batch. #new-seq: 1, #new-token: 3361, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 66
  14271. 2025-07-20 15:43:12,457 - __main__ - INFO - sglang running req: 9 queue req: 66
  14272. 2025-07-20 15:43:13,828 - __main__ - INFO - Queue remaining: 2
  14273. 2025-07-20 15:43:13,828 - __main__ - INFO -
  14274. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14275. ----------------------------------------------------------------------------------
  14276. sglang_input_tokens 921.06 1060.37
  14277. sglang_output_tokens 260.46 288.68
  14278. 2025-07-20 15:43:13,828 - __main__ - INFO -
  14279. Worker ID | finished | started
  14280. ----------+----------+--------
  14281. 0 | 434 | 500
  14282. 1 | 0 | 10
  14283. 2025-07-20 15:43:14,183 - sglang - INFO - [2025-07-20 15:43:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2132, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 65
  14284. 2025-07-20 15:43:14,183 - __main__ - INFO - sglang running req: 9 queue req: 65
  14285. 2025-07-20 15:43:15,000 - sglang - INFO - [2025-07-20 15:43:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1946, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 64
  14286. 2025-07-20 15:43:15,000 - __main__ - INFO - sglang running req: 9 queue req: 64
  14287. 2025-07-20 15:43:15,702 - sglang - INFO - [2025-07-20 15:43:15 TP0] Decode batch. #running-req: 10, #token: 27912, token usage: 0.73, gen throughput (token/s): 122.63, #queue-req: 64
  14288. 2025-07-20 15:43:15,703 - __main__ - INFO - sglang running req: 10 queue req: 64
  14289. 2025-07-20 15:43:16,774 - sglang - INFO - [2025-07-20 15:43:16 TP0] Decode batch. #running-req: 10, #token: 28312, token usage: 0.75, gen throughput (token/s): 373.29, #queue-req: 64
  14290. 2025-07-20 15:43:16,774 - __main__ - INFO - sglang running req: 10 queue req: 64
  14291. 2025-07-20 15:43:17,763 - sglang - INFO - [2025-07-20 15:43:17 TP0] Decode batch. #running-req: 10, #token: 28712, token usage: 0.76, gen throughput (token/s): 404.48, #queue-req: 64
  14292. 2025-07-20 15:43:17,763 - __main__ - INFO - sglang running req: 10 queue req: 64
  14293. 2025-07-20 15:43:18,741 - sglang - INFO - [2025-07-20 15:43:18 TP0] Decode batch. #running-req: 10, #token: 29112, token usage: 0.77, gen throughput (token/s): 408.71, #queue-req: 64
  14294. 2025-07-20 15:43:18,742 - __main__ - INFO - sglang running req: 10 queue req: 64
  14295. 2025-07-20 15:43:19,451 - sglang - INFO - [2025-07-20 15:43:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2343, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 63
  14296. 2025-07-20 15:43:19,451 - __main__ - INFO - sglang running req: 9 queue req: 63
  14297. 2025-07-20 15:43:20,492 - sglang - INFO - [2025-07-20 15:43:20 TP0] Decode batch. #running-req: 10, #token: 28939, token usage: 0.76, gen throughput (token/s): 227.85, #queue-req: 63
  14298. 2025-07-20 15:43:20,493 - __main__ - INFO - sglang running req: 10 queue req: 63
  14299. 2025-07-20 15:43:21,277 - sglang - INFO - [2025-07-20 15:43:21 TP0] Prefill batch. #new-seq: 2, #new-token: 3593, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 61
  14300. 2025-07-20 15:43:21,277 - __main__ - INFO - sglang running req: 9 queue req: 61
  14301. 2025-07-20 15:43:22,683 - sglang - INFO - [2025-07-20 15:43:22 TP0] Decode batch. #running-req: 11, #token: 29508, token usage: 0.78, gen throughput (token/s): 185.83, #queue-req: 61
  14302. 2025-07-20 15:43:22,683 - __main__ - INFO - sglang running req: 11 queue req: 61
  14303. 2025-07-20 15:43:22,929 - sglang - INFO - [2025-07-20 15:43:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1476, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 60
  14304. 2025-07-20 15:43:22,929 - __main__ - INFO - sglang running req: 10 queue req: 60
  14305. 2025-07-20 15:43:23,830 - __main__ - INFO - Queue remaining: 2
  14306. 2025-07-20 15:43:23,831 - __main__ - INFO -
  14307. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14308. ----------------------------------------------------------------------------------
  14309. sglang_input_tokens 923.57 1077.76
  14310. sglang_output_tokens 260.87 291.63
  14311. 2025-07-20 15:43:23,831 - __main__ - INFO -
  14312. Worker ID | finished | started
  14313. ----------+----------+--------
  14314. 0 | 439 | 500
  14315. 1 | 0 | 10
  14316. 2025-07-20 15:43:23,859 - sglang - INFO - [2025-07-20 15:43:23 TP0] Prefill batch. #new-seq: 2, #new-token: 4060, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 10, #queue-req: 58
  14317. 2025-07-20 15:43:23,860 - __main__ - INFO - sglang running req: 10 queue req: 58
  14318. 2025-07-20 15:43:25,518 - sglang - INFO - [2025-07-20 15:43:25 TP0] Decode batch. #running-req: 12, #token: 29428, token usage: 0.77, gen throughput (token/s): 159.80, #queue-req: 58
  14319. 2025-07-20 15:43:25,518 - __main__ - INFO - sglang running req: 12 queue req: 58
  14320. 2025-07-20 15:43:26,505 - sglang - INFO - [2025-07-20 15:43:26 TP0] Decode batch. #running-req: 12, #token: 29908, token usage: 0.79, gen throughput (token/s): 486.15, #queue-req: 58
  14321. 2025-07-20 15:43:26,505 - __main__ - INFO - sglang running req: 12 queue req: 58
  14322. 2025-07-20 15:43:27,496 - sglang - INFO - [2025-07-20 15:43:27 TP0] Decode batch. #running-req: 12, #token: 30388, token usage: 0.80, gen throughput (token/s): 484.37, #queue-req: 58
  14323. 2025-07-20 15:43:27,496 - __main__ - INFO - sglang running req: 12 queue req: 58
  14324. 2025-07-20 15:43:28,491 - sglang - INFO - [2025-07-20 15:43:28 TP0] Decode batch. #running-req: 12, #token: 30868, token usage: 0.81, gen throughput (token/s): 482.47, #queue-req: 58
  14325. 2025-07-20 15:43:28,491 - __main__ - INFO - sglang running req: 12 queue req: 58
  14326. 2025-07-20 15:43:28,566 - sglang - INFO - [2025-07-20 15:43:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1257, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 11, #queue-req: 57
  14327. 2025-07-20 15:43:28,566 - __main__ - INFO - sglang running req: 11 queue req: 57
  14328. 2025-07-20 15:43:29,321 - sglang - INFO - [2025-07-20 15:43:29 TP0] Prefill batch. #new-seq: 1, #new-token: 2571, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 11, #queue-req: 56
  14329. 2025-07-20 15:43:29,321 - __main__ - INFO - sglang running req: 11 queue req: 56
  14330. 2025-07-20 15:43:30,780 - sglang - INFO - [2025-07-20 15:43:30 TP0] Decode batch. #running-req: 12, #token: 30644, token usage: 0.81, gen throughput (token/s): 208.80, #queue-req: 56
  14331. 2025-07-20 15:43:30,780 - __main__ - INFO - sglang running req: 12 queue req: 56
  14332. 2025-07-20 15:43:31,129 - sglang - INFO - [2025-07-20 15:43:31 TP0] Prefill batch. #new-seq: 1, #new-token: 3400, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 11, #queue-req: 55
  14333. 2025-07-20 15:43:31,129 - __main__ - INFO - sglang running req: 11 queue req: 55
  14334. 2025-07-20 15:43:32,724 - sglang - INFO - [2025-07-20 15:43:32 TP0] Decode batch. #running-req: 12, #token: 32162, token usage: 0.85, gen throughput (token/s): 246.43, #queue-req: 55
  14335. 2025-07-20 15:43:32,724 - __main__ - INFO - sglang running req: 12 queue req: 55
  14336. 2025-07-20 15:43:33,690 - sglang - INFO - [2025-07-20 15:43:33 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 10, #queue-req: 54
  14337. 2025-07-20 15:43:33,691 - __main__ - INFO - sglang running req: 10 queue req: 54
  14338. 2025-07-20 15:43:33,832 - __main__ - INFO - Queue remaining: 2
  14339. 2025-07-20 15:43:33,832 - __main__ - INFO -
  14340. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14341. ----------------------------------------------------------------------------------
  14342. sglang_input_tokens 925.94 1084.97
  14343. sglang_output_tokens 261.06 291.57
  14344. 2025-07-20 15:43:33,832 - __main__ - INFO -
  14345. Worker ID | finished | started
  14346. ----------+----------+--------
  14347. 0 | 445 | 500
  14348. 1 | 0 | 10
  14349. 2025-07-20 15:43:34,364 - sglang - INFO - [2025-07-20 15:43:34 TP0] Decode batch. #running-req: 11, #token: 29473, token usage: 0.78, gen throughput (token/s): 275.55, #queue-req: 54
  14350. 2025-07-20 15:43:34,365 - __main__ - INFO - sglang running req: 11 queue req: 54
  14351. 2025-07-20 15:43:34,959 - sglang - INFO - [2025-07-20 15:43:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 53
  14352. 2025-07-20 15:43:34,960 - __main__ - INFO - sglang running req: 10 queue req: 53
  14353. 2025-07-20 15:43:36,115 - sglang - INFO - [2025-07-20 15:43:36 TP0] Decode batch. #running-req: 11, #token: 31068, token usage: 0.82, gen throughput (token/s): 250.71, #queue-req: 53
  14354. 2025-07-20 15:43:36,116 - __main__ - INFO - sglang running req: 11 queue req: 53
  14355. 2025-07-20 15:43:37,108 - sglang - INFO - [2025-07-20 15:43:37 TP0] Decode batch. #running-req: 11, #token: 31508, token usage: 0.83, gen throughput (token/s): 443.24, #queue-req: 53
  14356. 2025-07-20 15:43:37,108 - __main__ - INFO - sglang running req: 11 queue req: 53
  14357. 2025-07-20 15:43:37,357 - sglang - INFO - [2025-07-20 15:43:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2380, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 52
  14358. 2025-07-20 15:43:37,357 - __main__ - INFO - sglang running req: 10 queue req: 52
  14359. 2025-07-20 15:43:38,852 - sglang - INFO - [2025-07-20 15:43:38 TP0] Decode batch. #running-req: 11, #token: 32317, token usage: 0.85, gen throughput (token/s): 251.70, #queue-req: 52
  14360. 2025-07-20 15:43:38,852 - __main__ - INFO - sglang running req: 11 queue req: 52
  14361. 2025-07-20 15:43:39,698 - sglang - INFO - [2025-07-20 15:43:39 TP0] Prefill batch. #new-seq: 1, #new-token: 1982, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 51
  14362. 2025-07-20 15:43:39,699 - __main__ - INFO - sglang running req: 10 queue req: 51
  14363. 2025-07-20 15:43:40,504 - sglang - INFO - [2025-07-20 15:43:40 TP0] Decode batch. #running-req: 11, #token: 31965, token usage: 0.84, gen throughput (token/s): 265.78, #queue-req: 51
  14364. 2025-07-20 15:43:40,504 - __main__ - INFO - sglang running req: 11 queue req: 51
  14365. 2025-07-20 15:43:41,497 - sglang - INFO - [2025-07-20 15:43:41 TP0] Decode batch. #running-req: 11, #token: 32405, token usage: 0.85, gen throughput (token/s): 443.20, #queue-req: 51
  14366. 2025-07-20 15:43:41,497 - __main__ - INFO - sglang running req: 11 queue req: 51
  14367. 2025-07-20 15:43:42,494 - sglang - INFO - [2025-07-20 15:43:42 TP0] Decode batch. #running-req: 11, #token: 32845, token usage: 0.86, gen throughput (token/s): 441.06, #queue-req: 51
  14368. 2025-07-20 15:43:42,494 - __main__ - INFO - sglang running req: 11 queue req: 51
  14369. 2025-07-20 15:43:43,119 - sglang - INFO - [2025-07-20 15:43:43 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.80, #running-req: 10, #queue-req: 50
  14370. 2025-07-20 15:43:43,119 - __main__ - INFO - sglang running req: 10 queue req: 50
  14371. 2025-07-20 15:43:43,834 - __main__ - INFO - Queue remaining: 2
  14372. 2025-07-20 15:43:43,834 - __main__ - INFO -
  14373. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14374. ----------------------------------------------------------------------------------
  14375. sglang_input_tokens 924.34 1063.76
  14376. sglang_output_tokens 260.21 284.13
  14377. 2025-07-20 15:43:43,834 - __main__ - INFO -
  14378. Worker ID | finished | started
  14379. ----------+----------+--------
  14380. 0 | 449 | 500
  14381. 1 | 0 | 10
  14382. 2025-07-20 15:43:44,091 - sglang - INFO - [2025-07-20 15:43:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1908, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 10, #queue-req: 49
  14383. 2025-07-20 15:43:44,091 - __main__ - INFO - sglang running req: 10 queue req: 49
  14384. 2025-07-20 15:43:44,793 - sglang - INFO - [2025-07-20 15:43:44 TP0] Decode batch. #running-req: 11, #token: 31988, token usage: 0.84, gen throughput (token/s): 190.52, #queue-req: 49
  14385. 2025-07-20 15:43:44,793 - __main__ - INFO - sglang running req: 11 queue req: 49
  14386. 2025-07-20 15:43:44,869 - sglang - INFO - [2025-07-20 15:43:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1487, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 48
  14387. 2025-07-20 15:43:44,869 - __main__ - INFO - sglang running req: 10 queue req: 48
  14388. 2025-07-20 15:43:46,404 - sglang - INFO - [2025-07-20 15:43:46 TP0] Decode batch. #running-req: 11, #token: 31292, token usage: 0.82, gen throughput (token/s): 272.57, #queue-req: 48
  14389. 2025-07-20 15:43:46,404 - __main__ - INFO - sglang running req: 11 queue req: 48
  14390. 2025-07-20 15:43:46,653 - sglang - INFO - [2025-07-20 15:43:46 TP0] Prefill batch. #new-seq: 1, #new-token: 2564, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 47
  14391. 2025-07-20 15:43:46,653 - __main__ - INFO - sglang running req: 10 queue req: 47
  14392. 2025-07-20 15:43:48,181 - sglang - INFO - [2025-07-20 15:43:48 TP0] Decode batch. #running-req: 11, #token: 31291, token usage: 0.82, gen throughput (token/s): 247.13, #queue-req: 47
  14393. 2025-07-20 15:43:48,182 - __main__ - INFO - sglang running req: 11 queue req: 47
  14394. 2025-07-20 15:43:48,972 - sglang - INFO - [2025-07-20 15:43:48 TP0] Prefill batch. #new-seq: 1, #new-token: 2411, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 10, #queue-req: 46
  14395. 2025-07-20 15:43:48,973 - __main__ - INFO - sglang running req: 10 queue req: 46
  14396. 2025-07-20 15:43:49,909 - sglang - INFO - [2025-07-20 15:43:49 TP0] Decode batch. #running-req: 11, #token: 31922, token usage: 0.84, gen throughput (token/s): 253.88, #queue-req: 46
  14397. 2025-07-20 15:43:49,909 - __main__ - INFO - sglang running req: 11 queue req: 46
  14398. 2025-07-20 15:43:50,904 - sglang - INFO - [2025-07-20 15:43:50 TP0] Decode batch. #running-req: 11, #token: 32362, token usage: 0.85, gen throughput (token/s): 442.26, #queue-req: 46
  14399. 2025-07-20 15:43:50,904 - __main__ - INFO - sglang running req: 11 queue req: 46
  14400. 2025-07-20 15:43:51,894 - sglang - INFO - [2025-07-20 15:43:51 TP0] Decode batch. #running-req: 10, #token: 31118, token usage: 0.82, gen throughput (token/s): 409.93, #queue-req: 46
  14401. 2025-07-20 15:43:51,895 - __main__ - INFO - sglang running req: 10 queue req: 46
  14402. 2025-07-20 15:43:52,884 - sglang - INFO - [2025-07-20 15:43:52 TP0] Decode batch. #running-req: 10, #token: 31518, token usage: 0.83, gen throughput (token/s): 404.42, #queue-req: 46
  14403. 2025-07-20 15:43:52,884 - __main__ - INFO - sglang running req: 10 queue req: 46
  14404. 2025-07-20 15:43:53,082 - sglang - INFO - [2025-07-20 15:43:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1820, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 9, #queue-req: 45
  14405. 2025-07-20 15:43:53,082 - __main__ - INFO - sglang running req: 9 queue req: 45
  14406. 2025-07-20 15:43:53,835 - __main__ - INFO - Queue remaining: 2
  14407. 2025-07-20 15:43:53,835 - __main__ - INFO -
  14408. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14409. ----------------------------------------------------------------------------------
  14410. sglang_input_tokens 926.31 1084.65
  14411. sglang_output_tokens 259.83 286.17
  14412. 2025-07-20 15:43:53,836 - __main__ - INFO -
  14413. Worker ID | finished | started
  14414. ----------+----------+--------
  14415. 0 | 455 | 500
  14416. 1 | 0 | 10
  14417. 2025-07-20 15:43:54,229 - sglang - INFO - [2025-07-20 15:43:54 TP0] Prefill batch. #new-seq: 1, #new-token: 1362, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 44
  14418. 2025-07-20 15:43:54,229 - __main__ - INFO - sglang running req: 9 queue req: 44
  14419. 2025-07-20 15:43:55,045 - sglang - INFO - [2025-07-20 15:43:55 TP0] Decode batch. #running-req: 10, #token: 29802, token usage: 0.78, gen throughput (token/s): 184.16, #queue-req: 44
  14420. 2025-07-20 15:43:55,045 - __main__ - INFO - sglang running req: 10 queue req: 44
  14421. 2025-07-20 15:43:55,636 - sglang - INFO - [2025-07-20 15:43:55 TP0] Prefill batch. #new-seq: 1, #new-token: 3184, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 43
  14422. 2025-07-20 15:43:55,636 - __main__ - INFO - sglang running req: 9 queue req: 43
  14423. 2025-07-20 15:43:56,923 - sglang - INFO - [2025-07-20 15:43:56 TP0] Decode batch. #running-req: 10, #token: 28755, token usage: 0.76, gen throughput (token/s): 212.44, #queue-req: 43
  14424. 2025-07-20 15:43:56,923 - __main__ - INFO - sglang running req: 10 queue req: 43
  14425. 2025-07-20 15:43:56,997 - sglang - INFO - [2025-07-20 15:43:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2644, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 42
  14426. 2025-07-20 15:43:56,997 - __main__ - INFO - sglang running req: 9 queue req: 42
  14427. 2025-07-20 15:43:58,699 - sglang - INFO - [2025-07-20 15:43:58 TP0] Decode batch. #running-req: 10, #token: 28906, token usage: 0.76, gen throughput (token/s): 224.67, #queue-req: 42
  14428. 2025-07-20 15:43:58,699 - __main__ - INFO - sglang running req: 10 queue req: 42
  14429. 2025-07-20 15:43:58,994 - sglang - INFO - [2025-07-20 15:43:58 TP0] Prefill batch. #new-seq: 1, #new-token: 1245, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 41
  14430. 2025-07-20 15:43:58,994 - __main__ - INFO - sglang running req: 9 queue req: 41
  14431. 2025-07-20 15:44:00,186 - sglang - INFO - [2025-07-20 15:44:00 TP0] Decode batch. #running-req: 10, #token: 27370, token usage: 0.72, gen throughput (token/s): 268.25, #queue-req: 41
  14432. 2025-07-20 15:44:00,186 - __main__ - INFO - sglang running req: 10 queue req: 41
  14433. 2025-07-20 15:44:01,167 - sglang - INFO - [2025-07-20 15:44:01 TP0] Decode batch. #running-req: 10, #token: 27770, token usage: 0.73, gen throughput (token/s): 407.73, #queue-req: 41
  14434. 2025-07-20 15:44:01,168 - __main__ - INFO - sglang running req: 10 queue req: 41
  14435. 2025-07-20 15:44:02,151 - sglang - INFO - [2025-07-20 15:44:02 TP0] Decode batch. #running-req: 10, #token: 28170, token usage: 0.74, gen throughput (token/s): 406.49, #queue-req: 41
  14436. 2025-07-20 15:44:02,152 - __main__ - INFO - sglang running req: 10 queue req: 41
  14437. 2025-07-20 15:44:02,545 - sglang - INFO - [2025-07-20 15:44:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2967, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 9, #queue-req: 40
  14438. 2025-07-20 15:44:02,545 - __main__ - INFO - sglang running req: 9 queue req: 40
  14439. 2025-07-20 15:44:03,562 - sglang - INFO - [2025-07-20 15:44:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2119, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 39
  14440. 2025-07-20 15:44:03,562 - __main__ - INFO - sglang running req: 9 queue req: 39
  14441. 2025-07-20 15:44:03,837 - __main__ - INFO - Queue remaining: 2
  14442. 2025-07-20 15:44:03,837 - __main__ - INFO -
  14443. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14444. ----------------------------------------------------------------------------------
  14445. sglang_input_tokens 929.84 1087.41
  14446. sglang_output_tokens 260.68 286.18
  14447. 2025-07-20 15:44:03,837 - __main__ - INFO -
  14448. Worker ID | finished | started
  14449. ----------+----------+--------
  14450. 0 | 461 | 500
  14451. 1 | 0 | 10
  14452. 2025-07-20 15:44:04,406 - sglang - INFO - [2025-07-20 15:44:04 TP0] Prefill batch. #new-seq: 1, #new-token: 2470, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 38
  14453. 2025-07-20 15:44:04,406 - __main__ - INFO - sglang running req: 9 queue req: 38
  14454. 2025-07-20 15:44:05,435 - sglang - INFO - [2025-07-20 15:44:05 TP0] Decode batch. #running-req: 10, #token: 30742, token usage: 0.81, gen throughput (token/s): 120.89, #queue-req: 38
  14455. 2025-07-20 15:44:05,435 - __main__ - INFO - sglang running req: 10 queue req: 38
  14456. 2025-07-20 15:44:06,425 - sglang - INFO - [2025-07-20 15:44:06 TP0] Decode batch. #running-req: 10, #token: 31142, token usage: 0.82, gen throughput (token/s): 404.31, #queue-req: 38
  14457. 2025-07-20 15:44:06,425 - __main__ - INFO - sglang running req: 10 queue req: 38
  14458. 2025-07-20 15:44:07,413 - sglang - INFO - [2025-07-20 15:44:07 TP0] Decode batch. #running-req: 10, #token: 31542, token usage: 0.83, gen throughput (token/s): 404.78, #queue-req: 38
  14459. 2025-07-20 15:44:07,413 - __main__ - INFO - sglang running req: 10 queue req: 38
  14460. 2025-07-20 15:44:08,404 - sglang - INFO - [2025-07-20 15:44:08 TP0] Decode batch. #running-req: 10, #token: 31942, token usage: 0.84, gen throughput (token/s): 403.57, #queue-req: 38
  14461. 2025-07-20 15:44:08,404 - __main__ - INFO - sglang running req: 10 queue req: 38
  14462. 2025-07-20 15:44:09,397 - sglang - INFO - [2025-07-20 15:44:09 TP0] Decode batch. #running-req: 10, #token: 32342, token usage: 0.85, gen throughput (token/s): 402.77, #queue-req: 38
  14463. 2025-07-20 15:44:09,397 - __main__ - INFO - sglang running req: 10 queue req: 38
  14464. 2025-07-20 15:44:09,745 - sglang - INFO - [2025-07-20 15:44:09 TP0] Prefill batch. #new-seq: 1, #new-token: 2327, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 37
  14465. 2025-07-20 15:44:09,746 - __main__ - INFO - sglang running req: 9 queue req: 37
  14466. 2025-07-20 15:44:11,140 - sglang - INFO - [2025-07-20 15:44:11 TP0] Decode batch. #running-req: 10, #token: 31861, token usage: 0.84, gen throughput (token/s): 228.90, #queue-req: 37
  14467. 2025-07-20 15:44:11,140 - __main__ - INFO - sglang running req: 10 queue req: 37
  14468. 2025-07-20 15:44:12,132 - sglang - INFO - [2025-07-20 15:44:12 TP0] Decode batch. #running-req: 10, #token: 32261, token usage: 0.85, gen throughput (token/s): 403.31, #queue-req: 37
  14469. 2025-07-20 15:44:12,132 - __main__ - INFO - sglang running req: 10 queue req: 37
  14470. 2025-07-20 15:44:12,182 - sglang - INFO - [2025-07-20 15:44:12 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 36
  14471. 2025-07-20 15:44:12,182 - __main__ - INFO - sglang running req: 9 queue req: 36
  14472. 2025-07-20 15:44:13,774 - sglang - INFO - [2025-07-20 15:44:13 TP0] Decode batch. #running-req: 10, #token: 32136, token usage: 0.85, gen throughput (token/s): 242.97, #queue-req: 36
  14473. 2025-07-20 15:44:13,774 - __main__ - INFO - sglang running req: 10 queue req: 36
  14474. 2025-07-20 15:44:13,828 - __main__ - WARNING - JSON decode error on attempt 0 for scripts/data/11445224007035644H44421110A0001.pdf-3: Expecting ',' delimiter: line 1 column 2694 (char 2693)
  14475. 2025-07-20 15:44:13,839 - __main__ - INFO - Queue remaining: 2
  14476. 2025-07-20 15:44:13,839 - __main__ - INFO -
  14477. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14478. ----------------------------------------------------------------------------------
  14479. sglang_input_tokens 930.14 1082.33
  14480. sglang_output_tokens 261.25 286.52
  14481. 2025-07-20 15:44:13,839 - __main__ - INFO -
  14482. Worker ID | finished | started
  14483. ----------+----------+--------
  14484. 0 | 464 | 500
  14485. 1 | 0 | 10
  14486. 2025-07-20 15:44:13,849 - sglang - INFO - [2025-07-20 15:44:13 TP0] Prefill batch. #new-seq: 1, #new-token: 2390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 35
  14487. 2025-07-20 15:44:13,849 - __main__ - INFO - sglang running req: 9 queue req: 35
  14488. 2025-07-20 15:44:14,255 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
  14489. 2025-07-20 15:44:15,222 - sglang - INFO - [2025-07-20 15:44:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1962, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 35
  14490. 2025-07-20 15:44:15,222 - __main__ - INFO - sglang running req: 9 queue req: 35
  14491. 2025-07-20 15:44:16,173 - sglang - INFO - [2025-07-20 15:44:16 TP0] Decode batch. #running-req: 10, #token: 28797, token usage: 0.76, gen throughput (token/s): 165.90, #queue-req: 35
  14492. 2025-07-20 15:44:16,174 - __main__ - INFO - sglang running req: 10 queue req: 35
  14493. 2025-07-20 15:44:17,160 - sglang - INFO - [2025-07-20 15:44:17 TP0] Decode batch. #running-req: 10, #token: 29197, token usage: 0.77, gen throughput (token/s): 405.37, #queue-req: 35
  14494. 2025-07-20 15:44:17,160 - __main__ - INFO - sglang running req: 10 queue req: 35
  14495. 2025-07-20 15:44:18,143 - sglang - INFO - [2025-07-20 15:44:18 TP0] Decode batch. #running-req: 10, #token: 29597, token usage: 0.78, gen throughput (token/s): 406.95, #queue-req: 35
  14496. 2025-07-20 15:44:18,143 - __main__ - INFO - sglang running req: 10 queue req: 35
  14497. 2025-07-20 15:44:19,126 - sglang - INFO - [2025-07-20 15:44:19 TP0] Decode batch. #running-req: 10, #token: 29997, token usage: 0.79, gen throughput (token/s): 406.67, #queue-req: 35
  14498. 2025-07-20 15:44:19,127 - __main__ - INFO - sglang running req: 10 queue req: 35
  14499. 2025-07-20 15:44:20,113 - sglang - INFO - [2025-07-20 15:44:20 TP0] Decode batch. #running-req: 10, #token: 30397, token usage: 0.80, gen throughput (token/s): 405.35, #queue-req: 35
  14500. 2025-07-20 15:44:20,114 - __main__ - INFO - sglang running req: 10 queue req: 35
  14501. 2025-07-20 15:44:20,881 - sglang - INFO - [2025-07-20 15:44:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2725, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 34
  14502. 2025-07-20 15:44:20,881 - __main__ - INFO - sglang running req: 9 queue req: 34
  14503. 2025-07-20 15:44:21,913 - sglang - INFO - [2025-07-20 15:44:21 TP0] Decode batch. #running-req: 10, #token: 31444, token usage: 0.83, gen throughput (token/s): 221.66, #queue-req: 34
  14504. 2025-07-20 15:44:21,914 - __main__ - INFO - sglang running req: 10 queue req: 34
  14505. 2025-07-20 15:44:22,905 - sglang - INFO - [2025-07-20 15:44:22 TP0] Decode batch. #running-req: 10, #token: 31844, token usage: 0.84, gen throughput (token/s): 403.53, #queue-req: 34
  14506. 2025-07-20 15:44:22,905 - __main__ - INFO - sglang running req: 10 queue req: 34
  14507. 2025-07-20 15:44:23,103 - sglang - INFO - [2025-07-20 15:44:23 TP0] Prefill batch. #new-seq: 1, #new-token: 1337, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 33
  14508. 2025-07-20 15:44:23,104 - __main__ - INFO - sglang running req: 9 queue req: 33
  14509. 2025-07-20 15:44:23,841 - __main__ - INFO - Queue remaining: 2
  14510. 2025-07-20 15:44:23,841 - __main__ - INFO -
  14511. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14512. ----------------------------------------------------------------------------------
  14513. sglang_input_tokens 927.61 1075.17
  14514. sglang_output_tokens 260.36 281.92
  14515. 2025-07-20 15:44:23,841 - __main__ - INFO -
  14516. Worker ID | finished | started
  14517. ----------+----------+--------
  14518. 0 | 467 | 500
  14519. 1 | 0 | 10
  14520. 2025-07-20 15:44:23,920 - sglang - INFO - [2025-07-20 15:44:23 TP0] Prefill batch. #new-seq: 1, #new-token: 2122, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 32
  14521. 2025-07-20 15:44:23,920 - __main__ - INFO - sglang running req: 9 queue req: 32
  14522. 2025-07-20 15:44:25,086 - sglang - INFO - [2025-07-20 15:44:25 TP0] Decode batch. #running-req: 10, #token: 30764, token usage: 0.81, gen throughput (token/s): 182.45, #queue-req: 32
  14523. 2025-07-20 15:44:25,086 - __main__ - INFO - sglang running req: 10 queue req: 32
  14524. 2025-07-20 15:44:26,072 - sglang - INFO - [2025-07-20 15:44:26 TP0] Decode batch. #running-req: 10, #token: 31164, token usage: 0.82, gen throughput (token/s): 405.79, #queue-req: 32
  14525. 2025-07-20 15:44:26,072 - __main__ - INFO - sglang running req: 10 queue req: 32
  14526. 2025-07-20 15:44:27,058 - sglang - INFO - [2025-07-20 15:44:27 TP0] Decode batch. #running-req: 10, #token: 31564, token usage: 0.83, gen throughput (token/s): 405.79, #queue-req: 32
  14527. 2025-07-20 15:44:27,058 - __main__ - INFO - sglang running req: 10 queue req: 32
  14528. 2025-07-20 15:44:28,049 - sglang - INFO - [2025-07-20 15:44:28 TP0] Decode batch. #running-req: 10, #token: 31964, token usage: 0.84, gen throughput (token/s): 403.57, #queue-req: 32
  14529. 2025-07-20 15:44:28,049 - __main__ - INFO - sglang running req: 10 queue req: 32
  14530. 2025-07-20 15:44:28,891 - sglang - INFO - [2025-07-20 15:44:28 TP0] Prefill batch. #new-seq: 1, #new-token: 2371, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 31
  14531. 2025-07-20 15:44:28,891 - __main__ - INFO - sglang running req: 8 queue req: 31
  14532. 2025-07-20 15:44:29,787 - sglang - INFO - [2025-07-20 15:44:29 TP0] Decode batch. #running-req: 9, #token: 29052, token usage: 0.76, gen throughput (token/s): 218.53, #queue-req: 31
  14533. 2025-07-20 15:44:29,788 - __main__ - INFO - sglang running req: 9 queue req: 31
  14534. 2025-07-20 15:44:30,205 - sglang - INFO - [2025-07-20 15:44:30 TP0] Prefill batch. #new-seq: 3, #new-token: 5878, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 28
  14535. 2025-07-20 15:44:30,205 - __main__ - INFO - sglang running req: 8 queue req: 28
  14536. 2025-07-20 15:44:32,115 - sglang - INFO - [2025-07-20 15:44:32 TP0] Prefill batch. #new-seq: 1, #new-token: 2370, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 10, #queue-req: 27
  14537. 2025-07-20 15:44:32,115 - __main__ - INFO - sglang running req: 10 queue req: 27
  14538. 2025-07-20 15:44:33,408 - sglang - INFO - [2025-07-20 15:44:33 TP0] Decode batch. #running-req: 11, #token: 29044, token usage: 0.76, gen throughput (token/s): 111.58, #queue-req: 27
  14539. 2025-07-20 15:44:33,409 - __main__ - INFO - sglang running req: 11 queue req: 27
  14540. 2025-07-20 15:44:33,458 - sglang - INFO - [2025-07-20 15:44:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2701, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 26
  14541. 2025-07-20 15:44:33,458 - __main__ - INFO - sglang running req: 10 queue req: 26
  14542. 2025-07-20 15:44:33,843 - __main__ - INFO - Queue remaining: 2
  14543. 2025-07-20 15:44:33,843 - __main__ - INFO -
  14544. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14545. ----------------------------------------------------------------------------------
  14546. sglang_input_tokens 932.39 1086.75
  14547. sglang_output_tokens 262.01 287.15
  14548. 2025-07-20 15:44:33,844 - __main__ - INFO -
  14549. Worker ID | finished | started
  14550. ----------+----------+--------
  14551. 0 | 473 | 500
  14552. 1 | 0 | 10
  14553. 2025-07-20 15:44:34,759 - sglang - INFO - [2025-07-20 15:44:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2555, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 25
  14554. 2025-07-20 15:44:34,759 - __main__ - INFO - sglang running req: 10 queue req: 25
  14555. 2025-07-20 15:44:35,964 - sglang - INFO - [2025-07-20 15:44:35 TP0] Decode batch. #running-req: 11, #token: 28856, token usage: 0.76, gen throughput (token/s): 171.40, #queue-req: 25
  14556. 2025-07-20 15:44:35,964 - __main__ - INFO - sglang running req: 11 queue req: 25
  14557. 2025-07-20 15:44:36,952 - sglang - INFO - [2025-07-20 15:44:36 TP0] Decode batch. #running-req: 11, #token: 29296, token usage: 0.77, gen throughput (token/s): 445.44, #queue-req: 25
  14558. 2025-07-20 15:44:36,952 - __main__ - INFO - sglang running req: 11 queue req: 25
  14559. 2025-07-20 15:44:37,594 - sglang - INFO - [2025-07-20 15:44:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2703, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 10, #queue-req: 24
  14560. 2025-07-20 15:44:37,594 - __main__ - INFO - sglang running req: 10 queue req: 24
  14561. 2025-07-20 15:44:38,747 - sglang - INFO - [2025-07-20 15:44:38 TP0] Decode batch. #running-req: 11, #token: 29052, token usage: 0.76, gen throughput (token/s): 244.56, #queue-req: 24
  14562. 2025-07-20 15:44:38,747 - __main__ - INFO - sglang running req: 11 queue req: 24
  14563. 2025-07-20 15:44:39,733 - sglang - INFO - [2025-07-20 15:44:39 TP0] Decode batch. #running-req: 11, #token: 29492, token usage: 0.78, gen throughput (token/s): 445.84, #queue-req: 24
  14564. 2025-07-20 15:44:39,734 - __main__ - INFO - sglang running req: 11 queue req: 24
  14565. 2025-07-20 15:44:40,722 - sglang - INFO - [2025-07-20 15:44:40 TP0] Decode batch. #running-req: 11, #token: 29932, token usage: 0.79, gen throughput (token/s): 445.06, #queue-req: 24
  14566. 2025-07-20 15:44:40,722 - __main__ - INFO - sglang running req: 11 queue req: 24
  14567. 2025-07-20 15:44:41,710 - sglang - INFO - [2025-07-20 15:44:41 TP0] Decode batch. #running-req: 11, #token: 30372, token usage: 0.80, gen throughput (token/s): 445.19, #queue-req: 24
  14568. 2025-07-20 15:44:41,711 - __main__ - INFO - sglang running req: 11 queue req: 24
  14569. 2025-07-20 15:44:42,205 - sglang - INFO - [2025-07-20 15:44:42 TP0] Prefill batch. #new-seq: 1, #new-token: 2901, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 10, #queue-req: 23
  14570. 2025-07-20 15:44:42,205 - __main__ - INFO - sglang running req: 10 queue req: 23
  14571. 2025-07-20 15:44:43,539 - sglang - INFO - [2025-07-20 15:44:43 TP0] Decode batch. #running-req: 11, #token: 31609, token usage: 0.83, gen throughput (token/s): 240.05, #queue-req: 23
  14572. 2025-07-20 15:44:43,539 - __main__ - INFO - sglang running req: 11 queue req: 23
  14573. 2025-07-20 15:44:43,845 - __main__ - INFO - Queue remaining: 2
  14574. 2025-07-20 15:44:43,845 - __main__ - INFO -
  14575. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14576. ----------------------------------------------------------------------------------
  14577. sglang_input_tokens 930.11 1070.68
  14578. sglang_output_tokens 261.37 284.34
  14579. 2025-07-20 15:44:43,846 - __main__ - INFO -
  14580. Worker ID | finished | started
  14581. ----------+----------+--------
  14582. 0 | 476 | 500
  14583. 1 | 0 | 10
  14584. 2025-07-20 15:44:44,536 - sglang - INFO - [2025-07-20 15:44:44 TP0] Decode batch. #running-req: 11, #token: 32049, token usage: 0.84, gen throughput (token/s): 441.56, #queue-req: 23
  14585. 2025-07-20 15:44:44,536 - __main__ - INFO - sglang running req: 11 queue req: 23
  14586. 2025-07-20 15:44:45,407 - sglang - INFO - [2025-07-20 15:44:45 TP0] Prefill batch. #new-seq: 1, #new-token: 2369, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 10, #queue-req: 22
  14587. 2025-07-20 15:44:45,407 - __main__ - INFO - sglang running req: 10 queue req: 22
  14588. 2025-07-20 15:44:46,281 - sglang - INFO - [2025-07-20 15:44:46 TP0] Decode batch. #running-req: 11, #token: 32161, token usage: 0.85, gen throughput (token/s): 251.52, #queue-req: 22
  14589. 2025-07-20 15:44:46,281 - __main__ - INFO - sglang running req: 11 queue req: 22
  14590. 2025-07-20 15:44:47,277 - sglang - INFO - [2025-07-20 15:44:47 TP0] Decode batch. #running-req: 11, #token: 32601, token usage: 0.86, gen throughput (token/s): 441.90, #queue-req: 22
  14591. 2025-07-20 15:44:47,277 - __main__ - INFO - sglang running req: 11 queue req: 22
  14592. 2025-07-20 15:44:48,271 - sglang - INFO - [2025-07-20 15:44:48 TP0] Decode batch. #running-req: 11, #token: 33041, token usage: 0.87, gen throughput (token/s): 442.43, #queue-req: 22
  14593. 2025-07-20 15:44:48,271 - __main__ - INFO - sglang running req: 11 queue req: 22
  14594. 2025-07-20 15:44:49,265 - sglang - INFO - [2025-07-20 15:44:49 TP0] Decode batch. #running-req: 10, #token: 26313, token usage: 0.69, gen throughput (token/s): 440.61, #queue-req: 22
  14595. 2025-07-20 15:44:49,265 - __main__ - INFO - sglang running req: 10 queue req: 22
  14596. 2025-07-20 15:44:49,290 - sglang - INFO - [2025-07-20 15:44:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2383, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 21
  14597. 2025-07-20 15:44:49,290 - __main__ - INFO - sglang running req: 9 queue req: 21
  14598. 2025-07-20 15:44:50,555 - sglang - INFO - [2025-07-20 15:44:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2804, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 20
  14599. 2025-07-20 15:44:50,555 - __main__ - INFO - sglang running req: 9 queue req: 20
  14600. 2025-07-20 15:44:51,565 - sglang - INFO - [2025-07-20 15:44:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2967, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 19
  14601. 2025-07-20 15:44:51,565 - __main__ - INFO - sglang running req: 9 queue req: 19
  14602. 2025-07-20 15:44:52,682 - sglang - INFO - [2025-07-20 15:44:52 TP0] Decode batch. #running-req: 10, #token: 29451, token usage: 0.78, gen throughput (token/s): 116.21, #queue-req: 19
  14603. 2025-07-20 15:44:52,682 - __main__ - INFO - sglang running req: 10 queue req: 19
  14604. 2025-07-20 15:44:53,666 - sglang - INFO - [2025-07-20 15:44:53 TP0] Decode batch. #running-req: 10, #token: 29851, token usage: 0.79, gen throughput (token/s): 406.19, #queue-req: 19
  14605. 2025-07-20 15:44:53,667 - __main__ - INFO - sglang running req: 10 queue req: 19
  14606. 2025-07-20 15:44:53,839 - sglang - INFO - [2025-07-20 15:44:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2572, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 18
  14607. 2025-07-20 15:44:53,839 - __main__ - INFO - sglang running req: 9 queue req: 18
  14608. 2025-07-20 15:44:53,847 - __main__ - INFO - Queue remaining: 2
  14609. 2025-07-20 15:44:53,847 - __main__ - INFO -
  14610. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14611. ----------------------------------------------------------------------------------
  14612. sglang_input_tokens 933.93 1086.68
  14613. sglang_output_tokens 262.56 290.05
  14614. 2025-07-20 15:44:53,847 - __main__ - INFO -
  14615. Worker ID | finished | started
  14616. ----------+----------+--------
  14617. 0 | 482 | 500
  14618. 1 | 0 | 10
  14619. 2025-07-20 15:44:55,459 - sglang - INFO - [2025-07-20 15:44:55 TP0] Decode batch. #running-req: 10, #token: 30164, token usage: 0.79, gen throughput (token/s): 222.52, #queue-req: 18
  14620. 2025-07-20 15:44:55,460 - __main__ - INFO - sglang running req: 10 queue req: 18
  14621. 2025-07-20 15:44:56,223 - sglang - INFO - [2025-07-20 15:44:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2125, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 17
  14622. 2025-07-20 15:44:56,223 - __main__ - INFO - sglang running req: 9 queue req: 17
  14623. 2025-07-20 15:44:57,117 - sglang - INFO - [2025-07-20 15:44:57 TP0] Decode batch. #running-req: 10, #token: 29525, token usage: 0.78, gen throughput (token/s): 240.78, #queue-req: 17
  14624. 2025-07-20 15:44:57,117 - __main__ - INFO - sglang running req: 10 queue req: 17
  14625. 2025-07-20 15:44:57,511 - sglang - INFO - [2025-07-20 15:44:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2485, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 16
  14626. 2025-07-20 15:44:57,511 - __main__ - INFO - sglang running req: 9 queue req: 16
  14627. 2025-07-20 15:44:58,859 - sglang - INFO - [2025-07-20 15:44:58 TP0] Decode batch. #running-req: 10, #token: 29055, token usage: 0.76, gen throughput (token/s): 229.02, #queue-req: 16
  14628. 2025-07-20 15:44:58,859 - __main__ - INFO - sglang running req: 10 queue req: 16
  14629. 2025-07-20 15:44:59,843 - sglang - INFO - [2025-07-20 15:44:59 TP0] Decode batch. #running-req: 10, #token: 29455, token usage: 0.78, gen throughput (token/s): 406.55, #queue-req: 16
  14630. 2025-07-20 15:44:59,843 - __main__ - INFO - sglang running req: 10 queue req: 16
  14631. 2025-07-20 15:45:00,829 - sglang - INFO - [2025-07-20 15:45:00 TP0] Decode batch. #running-req: 10, #token: 29855, token usage: 0.79, gen throughput (token/s): 405.49, #queue-req: 16
  14632. 2025-07-20 15:45:00,829 - __main__ - INFO - sglang running req: 10 queue req: 16
  14633. 2025-07-20 15:45:01,569 - sglang - INFO - [2025-07-20 15:45:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1915, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 15
  14634. 2025-07-20 15:45:01,569 - __main__ - INFO - sglang running req: 9 queue req: 15
  14635. 2025-07-20 15:45:02,467 - sglang - INFO - [2025-07-20 15:45:02 TP0] Decode batch. #running-req: 10, #token: 28366, token usage: 0.75, gen throughput (token/s): 243.63, #queue-req: 15
  14636. 2025-07-20 15:45:02,467 - __main__ - INFO - sglang running req: 10 queue req: 15
  14637. 2025-07-20 15:45:02,614 - sglang - INFO - [2025-07-20 15:45:02 TP0] Prefill batch. #new-seq: 1, #new-token: 2971, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 14
  14638. 2025-07-20 15:45:02,615 - __main__ - INFO - sglang running req: 9 queue req: 14
  14639. 2025-07-20 15:45:03,848 - __main__ - INFO - Queue remaining: 2
  14640. 2025-07-20 15:45:03,848 - __main__ - INFO -
  14641. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14642. ----------------------------------------------------------------------------------
  14643. sglang_input_tokens 934.57 1091.81
  14644. sglang_output_tokens 262.47 290.17
  14645. 2025-07-20 15:45:03,848 - __main__ - INFO -
  14646. Worker ID | finished | started
  14647. ----------+----------+--------
  14648. 0 | 486 | 500
  14649. 1 | 0 | 10
  14650. 2025-07-20 15:45:04,320 - sglang - INFO - [2025-07-20 15:45:04 TP0] Decode batch. #running-req: 10, #token: 29437, token usage: 0.77, gen throughput (token/s): 215.26, #queue-req: 14
  14651. 2025-07-20 15:45:04,321 - __main__ - INFO - sglang running req: 10 queue req: 14
  14652. 2025-07-20 15:45:05,306 - sglang - INFO - [2025-07-20 15:45:05 TP0] Decode batch. #running-req: 10, #token: 29837, token usage: 0.79, gen throughput (token/s): 405.69, #queue-req: 14
  14653. 2025-07-20 15:45:05,307 - __main__ - INFO - sglang running req: 10 queue req: 14
  14654. 2025-07-20 15:45:06,296 - sglang - INFO - [2025-07-20 15:45:06 TP0] Decode batch. #running-req: 10, #token: 30237, token usage: 0.80, gen throughput (token/s): 404.29, #queue-req: 14
  14655. 2025-07-20 15:45:06,296 - __main__ - INFO - sglang running req: 10 queue req: 14
  14656. 2025-07-20 15:45:07,284 - sglang - INFO - [2025-07-20 15:45:07 TP0] Decode batch. #running-req: 10, #token: 30637, token usage: 0.81, gen throughput (token/s): 404.55, #queue-req: 14
  14657. 2025-07-20 15:45:07,285 - __main__ - INFO - sglang running req: 10 queue req: 14
  14658. 2025-07-20 15:45:08,272 - sglang - INFO - [2025-07-20 15:45:08 TP0] Decode batch. #running-req: 10, #token: 31037, token usage: 0.82, gen throughput (token/s): 404.97, #queue-req: 14
  14659. 2025-07-20 15:45:08,273 - __main__ - INFO - sglang running req: 10 queue req: 14
  14660. 2025-07-20 15:45:08,347 - sglang - INFO - [2025-07-20 15:45:08 TP0] Prefill batch. #new-seq: 1, #new-token: 1623, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 13
  14661. 2025-07-20 15:45:08,347 - __main__ - INFO - sglang running req: 9 queue req: 13
  14662. 2025-07-20 15:45:10,010 - sglang - INFO - [2025-07-20 15:45:10 TP0] Decode batch. #running-req: 10, #token: 30155, token usage: 0.79, gen throughput (token/s): 229.55, #queue-req: 13
  14663. 2025-07-20 15:45:10,011 - __main__ - INFO - sglang running req: 10 queue req: 13
  14664. 2025-07-20 15:45:10,997 - sglang - INFO - [2025-07-20 15:45:10 TP0] Decode batch. #running-req: 10, #token: 30555, token usage: 0.80, gen throughput (token/s): 405.29, #queue-req: 13
  14665. 2025-07-20 15:45:10,997 - __main__ - INFO - sglang running req: 10 queue req: 13
  14666. 2025-07-20 15:45:11,984 - sglang - INFO - [2025-07-20 15:45:11 TP0] Decode batch. #running-req: 10, #token: 30955, token usage: 0.81, gen throughput (token/s): 405.27, #queue-req: 13
  14667. 2025-07-20 15:45:11,985 - __main__ - INFO - sglang running req: 10 queue req: 13
  14668. 2025-07-20 15:45:12,976 - sglang - INFO - [2025-07-20 15:45:12 TP0] Decode batch. #running-req: 10, #token: 31355, token usage: 0.83, gen throughput (token/s): 403.51, #queue-req: 13
  14669. 2025-07-20 15:45:12,976 - __main__ - INFO - sglang running req: 10 queue req: 13
  14670. 2025-07-20 15:45:13,597 - sglang - INFO - [2025-07-20 15:45:13 TP0] Prefill batch. #new-seq: 1, #new-token: 3123, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 12
  14671. 2025-07-20 15:45:13,597 - __main__ - INFO - sglang running req: 9 queue req: 12
  14672. 2025-07-20 15:45:13,850 - __main__ - INFO - Queue remaining: 2
  14673. 2025-07-20 15:45:13,850 - __main__ - INFO -
  14674. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14675. ----------------------------------------------------------------------------------
  14676. sglang_input_tokens 931.07 1058.65
  14677. sglang_output_tokens 261.63 280.72
  14678. 2025-07-20 15:45:13,850 - __main__ - INFO -
  14679. Worker ID | finished | started
  14680. ----------+----------+--------
  14681. 0 | 488 | 500
  14682. 1 | 0 | 10
  14683. 2025-07-20 15:45:14,894 - sglang - INFO - [2025-07-20 15:45:14 TP0] Decode batch. #running-req: 10, #token: 31096, token usage: 0.82, gen throughput (token/s): 207.97, #queue-req: 12
  14684. 2025-07-20 15:45:14,894 - __main__ - INFO - sglang running req: 10 queue req: 12
  14685. 2025-07-20 15:45:15,044 - sglang - INFO - [2025-07-20 15:45:15 TP0] Prefill batch. #new-seq: 1, #new-token: 3337, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 11
  14686. 2025-07-20 15:45:15,044 - __main__ - INFO - sglang running req: 9 queue req: 11
  14687. 2025-07-20 15:45:16,401 - sglang - INFO - [2025-07-20 15:45:16 TP0] Prefill batch. #new-seq: 1, #new-token: 3404, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 10
  14688. 2025-07-20 15:45:16,402 - __main__ - INFO - sglang running req: 9 queue req: 10
  14689. 2025-07-20 15:45:17,782 - sglang - INFO - [2025-07-20 15:45:17 TP0] Prefill batch. #new-seq: 1, #new-token: 4466, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 9
  14690. 2025-07-20 15:45:17,782 - __main__ - INFO - sglang running req: 8 queue req: 9
  14691. 2025-07-20 15:45:19,072 - sglang - INFO - [2025-07-20 15:45:19 TP0] Decode batch. #running-req: 9, #token: 30826, token usage: 0.81, gen throughput (token/s): 93.60, #queue-req: 9
  14692. 2025-07-20 15:45:19,072 - __main__ - INFO - sglang running req: 9 queue req: 9
  14693. 2025-07-20 15:45:20,057 - sglang - INFO - [2025-07-20 15:45:20 TP0] Decode batch. #running-req: 9, #token: 31186, token usage: 0.82, gen throughput (token/s): 365.24, #queue-req: 9
  14694. 2025-07-20 15:45:20,058 - __main__ - INFO - sglang running req: 9 queue req: 9
  14695. 2025-07-20 15:45:20,329 - sglang - INFO - [2025-07-20 15:45:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2676, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 8, #queue-req: 8
  14696. 2025-07-20 15:45:20,329 - __main__ - INFO - sglang running req: 8 queue req: 8
  14697. 2025-07-20 15:45:21,875 - sglang - INFO - [2025-07-20 15:45:21 TP0] Decode batch. #running-req: 9, #token: 32311, token usage: 0.85, gen throughput (token/s): 197.45, #queue-req: 8
  14698. 2025-07-20 15:45:21,876 - __main__ - INFO - sglang running req: 9 queue req: 8
  14699. 2025-07-20 15:45:22,868 - sglang - INFO - [2025-07-20 15:45:22 TP0] Decode batch. #running-req: 9, #token: 32671, token usage: 0.86, gen throughput (token/s): 362.54, #queue-req: 8
  14700. 2025-07-20 15:45:22,868 - __main__ - INFO - sglang running req: 9 queue req: 8
  14701. 2025-07-20 15:45:23,809 - sglang - INFO - [2025-07-20 15:45:23 TP0] Prefill batch. #new-seq: 1, #new-token: 3126, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 8, #queue-req: 7
  14702. 2025-07-20 15:45:23,809 - __main__ - INFO - sglang running req: 8 queue req: 7
  14703. 2025-07-20 15:45:23,851 - __main__ - INFO - Queue remaining: 2
  14704. 2025-07-20 15:45:23,851 - __main__ - INFO -
  14705. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14706. ----------------------------------------------------------------------------------
  14707. sglang_input_tokens 934.77 1063.74
  14708. sglang_output_tokens 262.72 283.70
  14709. 2025-07-20 15:45:23,851 - __main__ - INFO -
  14710. Worker ID | finished | started
  14711. ----------+----------+--------
  14712. 0 | 493 | 500
  14713. 1 | 1 | 10
  14714. 2025-07-20 15:45:24,786 - sglang - INFO - [2025-07-20 15:45:24 TP0] Decode batch. #running-req: 9, #token: 32118, token usage: 0.85, gen throughput (token/s): 187.20, #queue-req: 7
  14715. 2025-07-20 15:45:24,786 - __main__ - INFO - sglang running req: 9 queue req: 7
  14716. 2025-07-20 15:45:25,775 - sglang - INFO - [2025-07-20 15:45:25 TP0] Decode batch. #running-req: 9, #token: 32478, token usage: 0.85, gen throughput (token/s): 364.16, #queue-req: 7
  14717. 2025-07-20 15:45:25,775 - __main__ - INFO - sglang running req: 9 queue req: 7
  14718. 2025-07-20 15:45:26,763 - sglang - INFO - [2025-07-20 15:45:26 TP0] Decode batch. #running-req: 9, #token: 32838, token usage: 0.86, gen throughput (token/s): 364.20, #queue-req: 7
  14719. 2025-07-20 15:45:26,763 - __main__ - INFO - sglang running req: 9 queue req: 7
  14720. 2025-07-20 15:45:27,862 - sglang - INFO - [2025-07-20 15:45:27 TP0] Decode batch. #running-req: 9, #token: 33198, token usage: 0.87, gen throughput (token/s): 327.70, #queue-req: 7
  14721. 2025-07-20 15:45:27,862 - __main__ - INFO - sglang running req: 9 queue req: 7
  14722. 2025-07-20 15:45:28,856 - sglang - INFO - [2025-07-20 15:45:28 TP0] Decode batch. #running-req: 9, #token: 33558, token usage: 0.88, gen throughput (token/s): 361.96, #queue-req: 7
  14723. 2025-07-20 15:45:28,857 - __main__ - INFO - sglang running req: 9 queue req: 7
  14724. 2025-07-20 15:45:29,849 - sglang - INFO - [2025-07-20 15:45:29 TP0] Decode batch. #running-req: 9, #token: 33918, token usage: 0.89, gen throughput (token/s): 362.56, #queue-req: 7
  14725. 2025-07-20 15:45:29,850 - __main__ - INFO - sglang running req: 9 queue req: 7
  14726. 2025-07-20 15:45:30,844 - sglang - INFO - [2025-07-20 15:45:30 TP0] Decode batch. #running-req: 9, #token: 34278, token usage: 0.90, gen throughput (token/s): 362.01, #queue-req: 7
  14727. 2025-07-20 15:45:30,844 - __main__ - INFO - sglang running req: 9 queue req: 7
  14728. 2025-07-20 15:45:31,839 - sglang - INFO - [2025-07-20 15:45:31 TP0] Decode batch. #running-req: 9, #token: 34638, token usage: 0.91, gen throughput (token/s): 361.72, #queue-req: 7
  14729. 2025-07-20 15:45:31,839 - __main__ - INFO - sglang running req: 9 queue req: 7
  14730. 2025-07-20 15:45:32,838 - sglang - INFO - [2025-07-20 15:45:32 TP0] Decode batch. #running-req: 9, #token: 34998, token usage: 0.92, gen throughput (token/s): 360.48, #queue-req: 7
  14731. 2025-07-20 15:45:32,838 - __main__ - INFO - sglang running req: 9 queue req: 7
  14732. 2025-07-20 15:45:33,835 - sglang - INFO - [2025-07-20 15:45:33 TP0] Decode batch. #running-req: 9, #token: 35358, token usage: 0.93, gen throughput (token/s): 360.83, #queue-req: 7
  14733. 2025-07-20 15:45:33,836 - __main__ - INFO - sglang running req: 9 queue req: 7
  14734. 2025-07-20 15:45:33,852 - __main__ - INFO - Queue remaining: 2
  14735. 2025-07-20 15:45:33,852 - __main__ - INFO -
  14736. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14737. ----------------------------------------------------------------------------------
  14738. sglang_input_tokens 927.24 1063.74
  14739. sglang_output_tokens 260.60 283.70
  14740. 2025-07-20 15:45:33,852 - __main__ - INFO -
  14741. Worker ID | finished | started
  14742. ----------+----------+--------
  14743. 0 | 493 | 500
  14744. 1 | 1 | 10
  14745. 2025-07-20 15:45:34,847 - sglang - INFO - [2025-07-20 15:45:34 TP0] Decode batch. #running-req: 8, #token: 31461, token usage: 0.83, gen throughput (token/s): 346.98, #queue-req: 7
  14746. 2025-07-20 15:45:34,847 - __main__ - INFO - sglang running req: 8 queue req: 7
  14747. 2025-07-20 15:45:35,786 - sglang - INFO - [2025-07-20 15:45:35 TP0] Decode batch. #running-req: 8, #token: 31781, token usage: 0.84, gen throughput (token/s): 340.79, #queue-req: 7
  14748. 2025-07-20 15:45:35,786 - __main__ - INFO - sglang running req: 8 queue req: 7
  14749. 2025-07-20 15:45:36,728 - sglang - INFO - [2025-07-20 15:45:36 TP0] Decode batch. #running-req: 8, #token: 32101, token usage: 0.85, gen throughput (token/s): 339.73, #queue-req: 7
  14750. 2025-07-20 15:45:36,728 - __main__ - INFO - sglang running req: 8 queue req: 7
  14751. 2025-07-20 15:45:37,669 - sglang - INFO - [2025-07-20 15:45:37 TP0] Decode batch. #running-req: 8, #token: 32421, token usage: 0.85, gen throughput (token/s): 339.89, #queue-req: 7
  14752. 2025-07-20 15:45:37,670 - __main__ - INFO - sglang running req: 8 queue req: 7
  14753. 2025-07-20 15:45:38,610 - sglang - INFO - [2025-07-20 15:45:38 TP0] Decode batch. #running-req: 8, #token: 32741, token usage: 0.86, gen throughput (token/s): 340.39, #queue-req: 7
  14754. 2025-07-20 15:45:38,610 - __main__ - INFO - sglang running req: 8 queue req: 7
  14755. 2025-07-20 15:45:38,728 - sglang - INFO - [2025-07-20 15:45:38 TP0] Prefill batch. #new-seq: 1, #new-token: 3140, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 7, #queue-req: 6
  14756. 2025-07-20 15:45:38,728 - __main__ - INFO - sglang running req: 7 queue req: 6
  14757. 2025-07-20 15:45:40,475 - sglang - INFO - [2025-07-20 15:45:40 TP0] Decode batch. #running-req: 8, #token: 31934, token usage: 0.84, gen throughput (token/s): 170.97, #queue-req: 6
  14758. 2025-07-20 15:45:40,476 - __main__ - INFO - sglang running req: 8 queue req: 6
  14759. 2025-07-20 15:45:41,041 - sglang - INFO - [2025-07-20 15:45:41 TP0] Prefill batch. #new-seq: 1, #new-token: 3079, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 5
  14760. 2025-07-20 15:45:41,041 - __main__ - INFO - sglang running req: 7 queue req: 5
  14761. 2025-07-20 15:45:42,341 - sglang - INFO - [2025-07-20 15:45:42 TP0] Decode batch. #running-req: 8, #token: 27719, token usage: 0.73, gen throughput (token/s): 171.00, #queue-req: 5
  14762. 2025-07-20 15:45:42,341 - __main__ - INFO - sglang running req: 8 queue req: 5
  14763. 2025-07-20 15:45:42,364 - sglang - INFO - [2025-07-20 15:45:42 TP0] Prefill batch. #new-seq: 1, #new-token: 3605, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 4
  14764. 2025-07-20 15:45:42,365 - __main__ - INFO - sglang running req: 7 queue req: 4
  14765. 2025-07-20 15:45:43,854 - __main__ - INFO - Queue remaining: 2
  14766. 2025-07-20 15:45:43,854 - __main__ - INFO -
  14767. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14768. ----------------------------------------------------------------------------------
  14769. sglang_input_tokens 928.95 1040.45
  14770. sglang_output_tokens 262.29 284.22
  14771. 2025-07-20 15:45:43,854 - __main__ - INFO -
  14772. Worker ID | finished | started
  14773. ----------+----------+--------
  14774. 0 | 496 | 500
  14775. 1 | 2 | 10
  14776. 2025-07-20 15:45:44,307 - sglang - INFO - [2025-07-20 15:45:44 TP0] Decode batch. #running-req: 8, #token: 31643, token usage: 0.83, gen throughput (token/s): 162.29, #queue-req: 4
  14777. 2025-07-20 15:45:44,307 - __main__ - INFO - sglang running req: 8 queue req: 4
  14778. 2025-07-20 15:45:45,245 - sglang - INFO - [2025-07-20 15:45:45 TP0] Decode batch. #running-req: 8, #token: 31963, token usage: 0.84, gen throughput (token/s): 340.91, #queue-req: 4
  14779. 2025-07-20 15:45:45,245 - __main__ - INFO - sglang running req: 8 queue req: 4
  14780. 2025-07-20 15:45:46,226 - sglang - INFO - [2025-07-20 15:45:46 TP0] Decode batch. #running-req: 8, #token: 32283, token usage: 0.85, gen throughput (token/s): 326.38, #queue-req: 4
  14781. 2025-07-20 15:45:46,226 - __main__ - INFO - sglang running req: 8 queue req: 4
  14782. 2025-07-20 15:45:47,169 - sglang - INFO - [2025-07-20 15:45:47 TP0] Decode batch. #running-req: 8, #token: 32603, token usage: 0.86, gen throughput (token/s): 339.08, #queue-req: 4
  14783. 2025-07-20 15:45:47,170 - __main__ - INFO - sglang running req: 8 queue req: 4
  14784. 2025-07-20 15:45:47,713 - sglang - INFO - [2025-07-20 15:45:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 7, #queue-req: 3
  14785. 2025-07-20 15:45:47,713 - __main__ - INFO - sglang running req: 7 queue req: 3
  14786. 2025-07-20 15:45:48,950 - sglang - INFO - [2025-07-20 15:45:48 TP0] Decode batch. #running-req: 8, #token: 31453, token usage: 0.83, gen throughput (token/s): 179.17, #queue-req: 3
  14787. 2025-07-20 15:45:48,950 - __main__ - INFO - sglang running req: 8 queue req: 3
  14788. 2025-07-20 15:45:49,895 - sglang - INFO - [2025-07-20 15:45:49 TP0] Decode batch. #running-req: 8, #token: 31773, token usage: 0.84, gen throughput (token/s): 338.63, #queue-req: 3
  14789. 2025-07-20 15:45:49,895 - __main__ - INFO - sglang running req: 8 queue req: 3
  14790. 2025-07-20 15:45:50,297 - sglang - INFO - [2025-07-20 15:45:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 7, #queue-req: 2
  14791. 2025-07-20 15:45:50,298 - __main__ - INFO - sglang running req: 7 queue req: 2
  14792. 2025-07-20 15:45:51,671 - sglang - INFO - [2025-07-20 15:45:51 TP0] Decode batch. #running-req: 8, #token: 30497, token usage: 0.80, gen throughput (token/s): 179.57, #queue-req: 2
  14793. 2025-07-20 15:45:51,671 - __main__ - INFO - sglang running req: 8 queue req: 2
  14794. 2025-07-20 15:45:52,610 - sglang - INFO - [2025-07-20 15:45:52 TP0] Decode batch. #running-req: 8, #token: 30817, token usage: 0.81, gen throughput (token/s): 340.94, #queue-req: 2
  14795. 2025-07-20 15:45:52,610 - __main__ - INFO - sglang running req: 8 queue req: 2
  14796. 2025-07-20 15:45:53,548 - sglang - INFO - [2025-07-20 15:45:53 TP0] Decode batch. #running-req: 8, #token: 31137, token usage: 0.82, gen throughput (token/s): 340.88, #queue-req: 2
  14797. 2025-07-20 15:45:53,548 - __main__ - INFO - sglang running req: 8 queue req: 2
  14798. 2025-07-20 15:45:53,855 - __main__ - INFO - Queue remaining: 2
  14799. 2025-07-20 15:45:53,855 - __main__ - INFO -
  14800. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14801. ----------------------------------------------------------------------------------
  14802. sglang_input_tokens 926.93 1030.15
  14803. sglang_output_tokens 261.87 282.35
  14804. 2025-07-20 15:45:53,855 - __main__ - INFO -
  14805. Worker ID | finished | started
  14806. ----------+----------+--------
  14807. 0 | 496 | 500
  14808. 1 | 4 | 10
  14809. 2025-07-20 15:45:54,490 - sglang - INFO - [2025-07-20 15:45:54 TP0] Decode batch. #running-req: 8, #token: 31457, token usage: 0.83, gen throughput (token/s): 339.95, #queue-req: 2
  14810. 2025-07-20 15:45:54,490 - __main__ - INFO - sglang running req: 8 queue req: 2
  14811. 2025-07-20 15:45:55,430 - sglang - INFO - [2025-07-20 15:45:55 TP0] Decode batch. #running-req: 8, #token: 31777, token usage: 0.84, gen throughput (token/s): 340.19, #queue-req: 2
  14812. 2025-07-20 15:45:55,430 - __main__ - INFO - sglang running req: 8 queue req: 2
  14813. 2025-07-20 15:45:56,089 - sglang - INFO - [2025-07-20 15:45:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 1
  14814. 2025-07-20 15:45:56,089 - __main__ - INFO - sglang running req: 7 queue req: 1
  14815. 2025-07-20 15:45:57,182 - sglang - INFO - [2025-07-20 15:45:57 TP0] Decode batch. #running-req: 8, #token: 30438, token usage: 0.80, gen throughput (token/s): 182.15, #queue-req: 1
  14816. 2025-07-20 15:45:57,182 - __main__ - INFO - sglang running req: 8 queue req: 1
  14817. 2025-07-20 15:45:58,131 - sglang - INFO - [2025-07-20 15:45:58 TP0] Decode batch. #running-req: 8, #token: 30758, token usage: 0.81, gen throughput (token/s): 337.29, #queue-req: 1
  14818. 2025-07-20 15:45:58,131 - __main__ - INFO - sglang running req: 8 queue req: 1
  14819. 2025-07-20 15:45:59,070 - sglang - INFO - [2025-07-20 15:45:59 TP0] Decode batch. #running-req: 8, #token: 31078, token usage: 0.82, gen throughput (token/s): 340.60, #queue-req: 1
  14820. 2025-07-20 15:45:59,070 - __main__ - INFO - sglang running req: 8 queue req: 1
  14821. 2025-07-20 15:46:00,011 - sglang - INFO - [2025-07-20 15:46:00 TP0] Decode batch. #running-req: 8, #token: 31398, token usage: 0.83, gen throughput (token/s): 340.20, #queue-req: 1
  14822. 2025-07-20 15:46:00,011 - __main__ - INFO - sglang running req: 8 queue req: 1
  14823. 2025-07-20 15:46:00,951 - sglang - INFO - [2025-07-20 15:46:00 TP0] Decode batch. #running-req: 8, #token: 31718, token usage: 0.83, gen throughput (token/s): 340.28, #queue-req: 1
  14824. 2025-07-20 15:46:00,951 - __main__ - INFO - sglang running req: 8 queue req: 1
  14825. 2025-07-20 15:46:01,893 - sglang - INFO - [2025-07-20 15:46:01 TP0] Decode batch. #running-req: 8, #token: 32038, token usage: 0.84, gen throughput (token/s): 339.79, #queue-req: 1
  14826. 2025-07-20 15:46:01,893 - __main__ - INFO - sglang running req: 8 queue req: 1
  14827. 2025-07-20 15:46:02,011 - sglang - INFO - [2025-07-20 15:46:02 TP0] Prefill batch. #new-seq: 1, #new-token: 3361, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
  14828. 2025-07-20 15:46:02,011 - __main__ - INFO - sglang running req: 7 queue req: 0
  14829. 2025-07-20 15:46:02,369 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  14830. 2025-07-20 15:46:02,370 - __main__ - INFO - Worker 2 processing work item 16158dc6fac58e5a41d3888b9554c3d75b2a5744
  14831. 2025-07-20 15:46:02,370 - __main__ - INFO - Created all tasks for 16158dc6fac58e5a41d3888b9554c3d75b2a5744
  14832. 2025-07-20 15:46:02,374 - __main__ - INFO - Got 5 pages to do for scripts/data/12445200726503846U3442014055009.pdf in worker 2
  14833. 2025-07-20 15:46:02,462 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-5
  14834. 2025-07-20 15:46:02,494 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-2
  14835. 2025-07-20 15:46:02,542 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-3
  14836. 2025-07-20 15:46:02,606 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-1
  14837. 2025-07-20 15:46:02,735 - __main__ - INFO - Built page query for scripts/data/12445200726503846U3442014055009.pdf-4
  14838. 2025-07-20 15:46:02,979 - sglang - INFO - [2025-07-20 15:46:02 TP0] Prefill batch. #new-seq: 1, #new-token: 1234, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.82, #running-req: 8, #queue-req: 4
  14839. 2025-07-20 15:46:02,980 - __main__ - INFO - sglang running req: 8 queue req: 4
  14840. 2025-07-20 15:46:03,857 - __main__ - INFO - Queue remaining: 1
  14841. 2025-07-20 15:46:03,857 - __main__ - INFO -
  14842. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14843. ----------------------------------------------------------------------------------
  14844. sglang_input_tokens 924.55 1011.15
  14845. sglang_output_tokens 261.89 281.43
  14846. 2025-07-20 15:46:03,857 - __main__ - INFO -
  14847. Worker ID | finished | started
  14848. ----------+----------+--------
  14849. 0 | 496 | 500
  14850. 1 | 6 | 10
  14851. 2 | 0 | 5
  14852. 2025-07-20 15:46:04,354 - sglang - INFO - [2025-07-20 15:46:04 TP0] Decode batch. #running-req: 9, #token: 32535, token usage: 0.86, gen throughput (token/s): 143.85, #queue-req: 4
  14853. 2025-07-20 15:46:04,354 - __main__ - INFO - sglang running req: 9 queue req: 4
  14854. 2025-07-20 15:46:05,346 - sglang - INFO - [2025-07-20 15:46:05 TP0] Decode batch. #running-req: 9, #token: 32895, token usage: 0.87, gen throughput (token/s): 362.98, #queue-req: 4
  14855. 2025-07-20 15:46:05,346 - __main__ - INFO - sglang running req: 9 queue req: 4
  14856. 2025-07-20 15:46:06,340 - sglang - INFO - [2025-07-20 15:46:06 TP0] Decode batch. #running-req: 9, #token: 33255, token usage: 0.88, gen throughput (token/s): 362.08, #queue-req: 4
  14857. 2025-07-20 15:46:06,340 - __main__ - INFO - sglang running req: 9 queue req: 4
  14858. 2025-07-20 15:46:07,307 - sglang - INFO - [2025-07-20 15:46:07 TP0] Decode batch. #running-req: 8, #token: 32226, token usage: 0.85, gen throughput (token/s): 340.11, #queue-req: 4
  14859. 2025-07-20 15:46:07,307 - __main__ - INFO - sglang running req: 8 queue req: 4
  14860. 2025-07-20 15:46:08,250 - sglang - INFO - [2025-07-20 15:46:08 TP0] Decode batch. #running-req: 8, #token: 32546, token usage: 0.86, gen throughput (token/s): 339.27, #queue-req: 4
  14861. 2025-07-20 15:46:08,251 - __main__ - INFO - sglang running req: 8 queue req: 4
  14862. 2025-07-20 15:46:09,194 - sglang - INFO - [2025-07-20 15:46:09 TP0] Decode batch. #running-req: 8, #token: 32866, token usage: 0.87, gen throughput (token/s): 339.15, #queue-req: 4
  14863. 2025-07-20 15:46:09,194 - __main__ - INFO - sglang running req: 8 queue req: 4
  14864. 2025-07-20 15:46:10,139 - sglang - INFO - [2025-07-20 15:46:10 TP0] Decode batch. #running-req: 8, #token: 33186, token usage: 0.87, gen throughput (token/s): 338.42, #queue-req: 4
  14865. 2025-07-20 15:46:10,140 - __main__ - INFO - sglang running req: 8 queue req: 4
  14866. 2025-07-20 15:46:11,089 - sglang - INFO - [2025-07-20 15:46:11 TP0] Decode batch. #running-req: 8, #token: 33506, token usage: 0.88, gen throughput (token/s): 337.03, #queue-req: 4
  14867. 2025-07-20 15:46:11,089 - __main__ - INFO - sglang running req: 8 queue req: 4
  14868. 2025-07-20 15:46:12,036 - sglang - INFO - [2025-07-20 15:46:12 TP0] Decode batch. #running-req: 8, #token: 33826, token usage: 0.89, gen throughput (token/s): 337.86, #queue-req: 4
  14869. 2025-07-20 15:46:12,036 - __main__ - INFO - sglang running req: 8 queue req: 4
  14870. 2025-07-20 15:46:12,982 - sglang - INFO - [2025-07-20 15:46:12 TP0] Decode batch. #running-req: 8, #token: 34146, token usage: 0.90, gen throughput (token/s): 338.22, #queue-req: 4
  14871. 2025-07-20 15:46:12,982 - __main__ - INFO - sglang running req: 8 queue req: 4
  14872. 2025-07-20 15:46:13,859 - __main__ - INFO - Queue remaining: 1
  14873. 2025-07-20 15:46:13,859 - __main__ - INFO -
  14874. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14875. ----------------------------------------------------------------------------------
  14876. sglang_input_tokens 918.29 1010.52
  14877. sglang_output_tokens 259.94 281.11
  14878. 2025-07-20 15:46:13,859 - __main__ - INFO -
  14879. Worker ID | finished | started
  14880. ----------+----------+--------
  14881. 0 | 496 | 500
  14882. 1 | 6 | 10
  14883. 2 | 1 | 5
  14884. 2025-07-20 15:46:13,933 - sglang - INFO - [2025-07-20 15:46:13 TP0] Decode batch. #running-req: 8, #token: 34466, token usage: 0.91, gen throughput (token/s): 336.68, #queue-req: 4
  14885. 2025-07-20 15:46:13,933 - __main__ - INFO - sglang running req: 8 queue req: 4
  14886. 2025-07-20 15:46:14,881 - sglang - INFO - [2025-07-20 15:46:14 TP0] Decode batch. #running-req: 8, #token: 34786, token usage: 0.92, gen throughput (token/s): 337.50, #queue-req: 4
  14887. 2025-07-20 15:46:14,881 - __main__ - INFO - sglang running req: 8 queue req: 4
  14888. 2025-07-20 15:46:15,829 - sglang - INFO - [2025-07-20 15:46:15 TP0] Decode batch. #running-req: 8, #token: 35106, token usage: 0.92, gen throughput (token/s): 337.59, #queue-req: 4
  14889. 2025-07-20 15:46:15,829 - __main__ - INFO - sglang running req: 8 queue req: 4
  14890. 2025-07-20 15:46:16,612 - sglang - INFO - [2025-07-20 15:46:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2072, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.83, #running-req: 7, #queue-req: 3
  14891. 2025-07-20 15:46:16,612 - __main__ - INFO - sglang running req: 7 queue req: 3
  14892. 2025-07-20 15:46:17,462 - sglang - INFO - [2025-07-20 15:46:17 TP0] Decode batch. #running-req: 8, #token: 33638, token usage: 0.89, gen throughput (token/s): 195.29, #queue-req: 3
  14893. 2025-07-20 15:46:17,462 - __main__ - INFO - sglang running req: 8 queue req: 3
  14894. 2025-07-20 15:46:18,422 - sglang - INFO - [2025-07-20 15:46:18 TP0] Decode batch. #running-req: 8, #token: 33958, token usage: 0.89, gen throughput (token/s): 333.39, #queue-req: 3
  14895. 2025-07-20 15:46:18,422 - __main__ - INFO - sglang running req: 8 queue req: 3
  14896. 2025-07-20 15:46:18,637 - sglang - INFO - [2025-07-20 15:46:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1378, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.78, #running-req: 7, #queue-req: 2
  14897. 2025-07-20 15:46:18,637 - __main__ - INFO - sglang running req: 7 queue req: 2
  14898. 2025-07-20 15:46:19,886 - sglang - INFO - [2025-07-20 15:46:19 TP0] Decode batch. #running-req: 8, #token: 31152, token usage: 0.82, gen throughput (token/s): 217.88, #queue-req: 2
  14899. 2025-07-20 15:46:19,886 - __main__ - INFO - sglang running req: 8 queue req: 2
  14900. 2025-07-20 15:46:20,825 - sglang - INFO - [2025-07-20 15:46:20 TP0] Decode batch. #running-req: 8, #token: 31472, token usage: 0.83, gen throughput (token/s): 340.63, #queue-req: 2
  14901. 2025-07-20 15:46:20,826 - __main__ - INFO - sglang running req: 8 queue req: 2
  14902. 2025-07-20 15:46:21,769 - sglang - INFO - [2025-07-20 15:46:21 TP0] Decode batch. #running-req: 8, #token: 31792, token usage: 0.84, gen throughput (token/s): 339.31, #queue-req: 2
  14903. 2025-07-20 15:46:21,769 - __main__ - INFO - sglang running req: 8 queue req: 2
  14904. 2025-07-20 15:46:22,711 - sglang - INFO - [2025-07-20 15:46:22 TP0] Decode batch. #running-req: 8, #token: 32112, token usage: 0.85, gen throughput (token/s): 339.62, #queue-req: 2
  14905. 2025-07-20 15:46:22,711 - __main__ - INFO - sglang running req: 8 queue req: 2
  14906. 2025-07-20 15:46:22,853 - sglang - INFO - [2025-07-20 15:46:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2461, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 1
  14907. 2025-07-20 15:46:22,853 - __main__ - INFO - sglang running req: 7 queue req: 1
  14908. 2025-07-20 15:46:23,861 - __main__ - INFO - Queue remaining: 1
  14909. 2025-07-20 15:46:23,861 - __main__ - INFO -
  14910. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14911. ----------------------------------------------------------------------------------
  14912. sglang_input_tokens 918.22 976.63
  14913. sglang_output_tokens 260.88 275.95
  14914. 2025-07-20 15:46:23,861 - __main__ - INFO -
  14915. Worker ID | finished | started
  14916. ----------+----------+--------
  14917. 0 | 497 | 500
  14918. 1 | 8 | 10
  14919. 2 | 1 | 5
  14920. 2025-07-20 15:46:24,406 - sglang - INFO - [2025-07-20 15:46:24 TP0] Decode batch. #running-req: 8, #token: 30352, token usage: 0.80, gen throughput (token/s): 188.19, #queue-req: 1
  14921. 2025-07-20 15:46:24,406 - __main__ - INFO - sglang running req: 8 queue req: 1
  14922. 2025-07-20 15:46:24,735 - sglang - INFO - [2025-07-20 15:46:24 TP0] Prefill batch. #new-seq: 1, #new-token: 2640, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 7, #queue-req: 0
  14923. 2025-07-20 15:46:24,735 - __main__ - INFO - sglang running req: 7 queue req: 0
  14924. 2025-07-20 15:46:26,138 - sglang - INFO - [2025-07-20 15:46:26 TP0] Decode batch. #running-req: 8, #token: 31729, token usage: 0.84, gen throughput (token/s): 184.20, #queue-req: 0
  14925. 2025-07-20 15:46:26,138 - __main__ - INFO - sglang running req: 8 queue req: 0
  14926. 2025-07-20 15:46:26,688 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
  14927. 2025-07-20 15:46:26,977 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
  14928. 2025-07-20 15:46:27,074 - sglang - INFO - [2025-07-20 15:46:27 TP0] Decode batch. #running-req: 7, #token: 27788, token usage: 0.73, gen throughput (token/s): 323.76, #queue-req: 0
  14929. 2025-07-20 15:46:27,074 - __main__ - INFO - sglang running req: 7 queue req: 0
  14930. 2025-07-20 15:46:27,196 - sglang - INFO - [2025-07-20 15:46:27 TP0] Prefill batch. #new-seq: 1, #new-token: 2884, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
  14931. 2025-07-20 15:46:27,196 - __main__ - INFO - sglang running req: 7 queue req: 0
  14932. 2025-07-20 15:46:28,865 - sglang - INFO - [2025-07-20 15:46:28 TP0] Decode batch. #running-req: 8, #token: 30987, token usage: 0.82, gen throughput (token/s): 175.82, #queue-req: 0
  14933. 2025-07-20 15:46:28,865 - __main__ - INFO - sglang running req: 8 queue req: 0
  14934. 2025-07-20 15:46:29,807 - sglang - INFO - [2025-07-20 15:46:29 TP0] Decode batch. #running-req: 8, #token: 31307, token usage: 0.82, gen throughput (token/s): 339.72, #queue-req: 0
  14935. 2025-07-20 15:46:29,807 - __main__ - INFO - sglang running req: 8 queue req: 0
  14936. 2025-07-20 15:46:30,335 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
  14937. 2025-07-20 15:46:30,539 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
  14938. 2025-07-20 15:46:30,703 - sglang - INFO - [2025-07-20 15:46:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2787, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 0
  14939. 2025-07-20 15:46:30,703 - __main__ - INFO - sglang running req: 7 queue req: 0
  14940. 2025-07-20 15:46:31,560 - sglang - INFO - [2025-07-20 15:46:31 TP0] Decode batch. #running-req: 8, #token: 30417, token usage: 0.80, gen throughput (token/s): 173.44, #queue-req: 0
  14941. 2025-07-20 15:46:31,560 - __main__ - INFO - sglang running req: 8 queue req: 0
  14942. 2025-07-20 15:46:32,405 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  14943. 2025-07-20 15:46:32,406 - __main__ - INFO - Worker 3 processing work item b903c79fc04852a9f203dfa04143731928e937aa
  14944. 2025-07-20 15:46:32,406 - __main__ - INFO - Created all tasks for b903c79fc04852a9f203dfa04143731928e937aa
  14945. 2025-07-20 15:46:32,416 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/ambiguous.pdf in worker 3
  14946. 2025-07-20 15:46:32,421 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/failing_anchor_pg4.pdf in worker 3
  14947. 2025-07-20 15:46:32,422 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/dolma-page-1.pdf in worker 3
  14948. 2025-07-20 15:46:32,427 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/edgar.pdf in worker 3
  14949. 2025-07-20 15:46:32,438 - __main__ - INFO - Got 3 pages to do for tests/gnarly_pdfs/guidebook_failed_pages.pdf in worker 3
  14950. 2025-07-20 15:46:32,447 - __main__ - INFO - Got 48 pages to do for tests/gnarly_pdfs/bws_book_ch2.pdf in worker 3
  14951. 2025-07-20 15:46:32,451 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/handwriting_bad_ocr.pdf in worker 3
  14952. 2025-07-20 15:46:32,455 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/large_prompt_hint2.pdf in worker 3
  14953. 2025-07-20 15:46:32,487 - __main__ - INFO - Got 4 pages to do for tests/gnarly_pdfs/large_prompt_hint3.pdf in worker 3
  14954. 2025-07-20 15:46:32,561 - __main__ - INFO - Got 54 pages to do for tests/gnarly_pdfs/overrun_on_pg8.pdf in worker 3
  14955. 2025-07-20 15:46:32,567 - __main__ - INFO - Got 26 pages to do for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf in worker 3
  14956. 2025-07-20 15:46:32,650 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/not_parsing2.pdf in worker 3
  14957. 2025-07-20 15:46:32,656 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf in worker 3
  14958. 2025-07-20 15:46:32,733 - __main__ - INFO - Got 14 pages to do for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf in worker 3
  14959. 2025-07-20 15:46:32,740 - __main__ - INFO - Got 2 pages to do for tests/gnarly_pdfs/skinnypage.pdf in worker 3
  14960. 2025-07-20 15:46:32,743 - __main__ - INFO - Got 6 pages to do for tests/gnarly_pdfs/lots_of_sci_tables.pdf in worker 3
  14961. 2025-07-20 15:46:32,745 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/olmo-page-1.pdf in worker 3
  14962. 2025-07-20 15:46:32,756 - __main__ - INFO - Got 10 pages to do for tests/gnarly_pdfs/form_on_later_pages.pdf in worker 3
  14963. 2025-07-20 15:46:32,763 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/lots_of_chem_tables.pdf in worker 3
  14964. 2025-07-20 15:46:32,837 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/newspaper.pdf in worker 3
  14965. 2025-07-20 15:46:32,842 - __main__ - INFO - Got 7 pages to do for tests/gnarly_pdfs/most_content_in_image_form.pdf in worker 3
  14966. 2025-07-20 15:46:32,857 - __main__ - INFO - Got 9 pages to do for tests/gnarly_pdfs/failing_pdf_pg9.pdf in worker 3
  14967. 2025-07-20 15:46:32,867 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/small_page_size.pdf in worker 3
  14968. 2025-07-20 15:46:33,552 - __main__ - INFO - Got 106 pages to do for tests/gnarly_pdfs/instructions_and_schematics.pdf in worker 3
  14969. 2025-07-20 15:46:33,648 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/map1.pdf in worker 3
  14970. 2025-07-20 15:46:33,660 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/some_ocr1.pdf in worker 3
  14971. 2025-07-20 15:46:34,583 - __main__ - INFO - Got 27 pages to do for tests/gnarly_pdfs/large_prompt_hint1.pdf in worker 3
  14972. 2025-07-20 15:46:34,600 - __main__ - INFO - Got 40 pages to do for tests/gnarly_pdfs/ti89_guidebook_programming.pdf in worker 3
  14973. 2025-07-20 15:46:34,634 - __main__ - INFO - Got 29 pages to do for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf in worker 3
  14974. 2025-07-20 15:46:34,661 - __main__ - INFO - Got 68 pages to do for tests/gnarly_pdfs/slideshow_mostly_images.pdf in worker 3
  14975. 2025-07-20 15:46:34,667 - __main__ - INFO - Got 8 pages to do for tests/gnarly_pdfs/not_parsing.pdf in worker 3
  14976. 2025-07-20 15:46:34,680 - __main__ - INFO - Got 16 pages to do for tests/gnarly_pdfs/load_v_error.pdf in worker 3
  14977. 2025-07-20 15:46:34,707 - sglang - INFO - [2025-07-20 15:46:32 TP0] Decode batch. #running-req: 7, #token: 23748, token usage: 0.63, gen throughput (token/s): 327.70, #queue-req: 0
  14978. 2025-07-20 15:46:34,708 - __main__ - INFO - sglang running req: 7 queue req: 0
  14979. 2025-07-20 15:46:34,708 - sglang - INFO - [2025-07-20 15:46:34 TP0] Decode batch. #running-req: 7, #token: 24028, token usage: 0.63, gen throughput (token/s): 160.43, #queue-req: 0
  14980. 2025-07-20 15:46:34,735 - __main__ - INFO - sglang running req: 7 queue req: 0
  14981. 2025-07-20 15:46:34,737 - __main__ - INFO - Queue remaining: 0
  14982. 2025-07-20 15:46:34,739 - __main__ - INFO -
  14983. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  14984. ----------------------------------------------------------------------------------
  14985. sglang_input_tokens 919.40 976.75
  14986. sglang_output_tokens 262.76 281.58
  14987. 2025-07-20 15:46:34,741 - __main__ - INFO -
  14988. Worker ID | finished | started
  14989. ----------+----------+--------
  14990. 0 | 497 | 500
  14991. 1 | 9 | 10
  14992. 2 | 2 | 5
  14993. 3 | 0 | 529
  14994. 2025-07-20 15:46:34,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ambiguous.pdf-1
  14995. 2025-07-20 15:46:34,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-2
  14996. 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-1
  14997. 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-6
  14998. 2025-07-20 15:46:34,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-1
  14999. 2025-07-20 15:46:34,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-8
  15000. 2025-07-20 15:46:34,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-5
  15001. 2025-07-20 15:46:34,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-7
  15002. 2025-07-20 15:46:34,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-2
  15003. 2025-07-20 15:46:34,748 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-1
  15004. 2025-07-20 15:46:34,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-13
  15005. 2025-07-20 15:46:34,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-16
  15006. 2025-07-20 15:46:34,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-9
  15007. 2025-07-20 15:46:34,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-10
  15008. 2025-07-20 15:46:34,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-11
  15009. 2025-07-20 15:46:34,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-6
  15010. 2025-07-20 15:46:34,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-19
  15011. 2025-07-20 15:46:34,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-15
  15012. 2025-07-20 15:46:34,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-18
  15013. 2025-07-20 15:46:34,759 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-7
  15014. 2025-07-20 15:46:34,759 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-14
  15015. 2025-07-20 15:46:34,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-3
  15016. 2025-07-20 15:46:34,763 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-4
  15017. 2025-07-20 15:46:34,764 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-2
  15018. 2025-07-20 15:46:34,766 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-4
  15019. 2025-07-20 15:46:34,767 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-17
  15020. 2025-07-20 15:46:34,768 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-3
  15021. 2025-07-20 15:46:34,770 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-12
  15022. 2025-07-20 15:46:34,771 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-5
  15023. 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-41
  15024. 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_anchor_pg4.pdf-8
  15025. 2025-07-20 15:46:34,772 - __main__ - INFO - Built page query for tests/gnarly_pdfs/dolma-page-1.pdf-1
  15026. 2025-07-20 15:46:34,773 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-30
  15027. 2025-07-20 15:46:34,773 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-23
  15028. 2025-07-20 15:46:34,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-21
  15029. 2025-07-20 15:46:34,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-28
  15030. 2025-07-20 15:46:34,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-22
  15031. 2025-07-20 15:46:34,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-32
  15032. 2025-07-20 15:46:34,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-39
  15033. 2025-07-20 15:46:34,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-31
  15034. 2025-07-20 15:46:34,779 - __main__ - INFO - Built page query for tests/gnarly_pdfs/guidebook_failed_pages.pdf-3
  15035. 2025-07-20 15:46:34,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-24
  15036. 2025-07-20 15:46:34,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-29
  15037. 2025-07-20 15:46:34,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-34
  15038. 2025-07-20 15:46:34,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-37
  15039. 2025-07-20 15:46:34,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-40
  15040. 2025-07-20 15:46:34,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-20
  15041. 2025-07-20 15:46:34,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-25
  15042. 2025-07-20 15:46:34,848 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-36
  15043. 2025-07-20 15:46:34,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-35
  15044. 2025-07-20 15:46:34,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-27
  15045. 2025-07-20 15:46:34,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/edgar.pdf-1
  15046. 2025-07-20 15:46:34,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-26
  15047. 2025-07-20 15:46:34,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-33
  15048. 2025-07-20 15:46:34,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-38
  15049. 2025-07-20 15:46:34,855 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-42
  15050. 2025-07-20 15:46:34,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-46
  15051. 2025-07-20 15:46:34,861 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-44
  15052. 2025-07-20 15:46:34,863 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-45
  15053. 2025-07-20 15:46:34,865 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-48
  15054. 2025-07-20 15:46:34,866 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-47
  15055. 2025-07-20 15:46:34,867 - __main__ - INFO - Built page query for tests/gnarly_pdfs/bws_book_ch2.pdf-43
  15056. 2025-07-20 15:46:35,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-6
  15057. 2025-07-20 15:46:35,346 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-1
  15058. 2025-07-20 15:46:35,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-12
  15059. 2025-07-20 15:46:35,640 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-17
  15060. 2025-07-20 15:46:35,645 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-18
  15061. 2025-07-20 15:46:35,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-3
  15062. 2025-07-20 15:46:35,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-2
  15063. 2025-07-20 15:46:35,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-9
  15064. 2025-07-20 15:46:35,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-4
  15065. 2025-07-20 15:46:35,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-19
  15066. 2025-07-20 15:46:35,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-14
  15067. 2025-07-20 15:46:35,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-16
  15068. 2025-07-20 15:46:35,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-5
  15069. 2025-07-20 15:46:35,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-13
  15070. 2025-07-20 15:46:35,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-2
  15071. 2025-07-20 15:46:35,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-4
  15072. 2025-07-20 15:46:35,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-11
  15073. 2025-07-20 15:46:35,946 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-7
  15074. 2025-07-20 15:46:36,034 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-3
  15075. 2025-07-20 15:46:36,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-10
  15076. 2025-07-20 15:46:36,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-6
  15077. 2025-07-20 15:46:36,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-21
  15078. 2025-07-20 15:46:36,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-1
  15079. 2025-07-20 15:46:36,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-8
  15080. 2025-07-20 15:46:36,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-22
  15081. 2025-07-20 15:46:36,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-15
  15082. 2025-07-20 15:46:36,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-30
  15083. 2025-07-20 15:46:36,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-25
  15084. 2025-07-20 15:46:36,634 - sglang - INFO - [2025-07-20 15:46:36 TP0] Decode batch. #running-req: 7, #token: 24308, token usage: 0.64, gen throughput (token/s): 118.91, #queue-req: 0
  15085. 2025-07-20 15:46:36,635 - __main__ - INFO - sglang running req: 7 queue req: 0
  15086. 2025-07-20 15:46:36,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-37
  15087. 2025-07-20 15:46:36,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-31
  15088. 2025-07-20 15:46:36,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-20
  15089. 2025-07-20 15:46:36,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-24
  15090. 2025-07-20 15:46:36,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-35
  15091. 2025-07-20 15:46:36,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-38
  15092. 2025-07-20 15:46:36,842 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-23
  15093. 2025-07-20 15:46:36,847 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-36
  15094. 2025-07-20 15:46:36,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-29
  15095. 2025-07-20 15:46:36,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-39
  15096. 2025-07-20 15:46:37,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-26
  15097. 2025-07-20 15:46:37,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint2.pdf-5
  15098. 2025-07-20 15:46:37,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-34
  15099. 2025-07-20 15:46:37,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-32
  15100. 2025-07-20 15:46:37,239 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-44
  15101. 2025-07-20 15:46:37,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-27
  15102. 2025-07-20 15:46:37,339 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-28
  15103. 2025-07-20 15:46:37,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-40
  15104. 2025-07-20 15:46:37,438 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-45
  15105. 2025-07-20 15:46:37,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-46
  15106. 2025-07-20 15:46:37,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-4
  15107. 2025-07-20 15:46:37,535 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-33
  15108. 2025-07-20 15:46:37,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-47
  15109. 2025-07-20 15:46:37,552 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-43
  15110. 2025-07-20 15:46:37,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2
  15111. 2025-07-20 15:46:37,737 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-48
  15112. 2025-07-20 15:46:37,747 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-52
  15113. 2025-07-20 15:46:37,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-49
  15114. 2025-07-20 15:46:37,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1
  15115. 2025-07-20 15:46:37,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-50
  15116. 2025-07-20 15:46:37,849 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-41
  15117. 2025-07-20 15:46:37,933 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3
  15118. 2025-07-20 15:46:37,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8
  15119. 2025-07-20 15:46:38,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-54
  15120. 2025-07-20 15:46:38,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14
  15121. 2025-07-20 15:46:38,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-51
  15122. 2025-07-20 15:46:38,347 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6
  15123. 2025-07-20 15:46:38,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-42
  15124. 2025-07-20 15:46:38,451 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10
  15125. 2025-07-20 15:46:38,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7
  15126. 2025-07-20 15:46:38,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11
  15127. 2025-07-20 15:46:38,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5
  15128. 2025-07-20 15:46:38,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22
  15129. 2025-07-20 15:46:38,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21
  15130. 2025-07-20 15:46:38,736 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15
  15131. 2025-07-20 15:46:38,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24
  15132. 2025-07-20 15:46:38,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26
  15133. 2025-07-20 15:46:38,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12
  15134. 2025-07-20 15:46:38,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/overrun_on_pg8.pdf-53
  15135. 2025-07-20 15:46:38,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13
  15136. 2025-07-20 15:46:38,952 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17
  15137. 2025-07-20 15:46:39,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-2
  15138. 2025-07-20 15:46:39,140 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20
  15139. 2025-07-20 15:46:39,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23
  15140. 2025-07-20 15:46:39,233 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18
  15141. 2025-07-20 15:46:39,242 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16
  15142. 2025-07-20 15:46:39,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19
  15143. 2025-07-20 15:46:39,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9
  15144. 2025-07-20 15:46:39,553 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25
  15145. 2025-07-20 15:46:39,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-1
  15146. 2025-07-20 15:46:39,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-3
  15147. 2025-07-20 15:46:39,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-5
  15148. 2025-07-20 15:46:39,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-1
  15149. 2025-07-20 15:46:39,933 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-2
  15150. 2025-07-20 15:46:40,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-9
  15151. 2025-07-20 15:46:40,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3
  15152. 2025-07-20 15:46:40,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-6
  15153. 2025-07-20 15:46:40,452 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-4
  15154. 2025-07-20 15:46:40,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-4
  15155. 2025-07-20 15:46:40,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1
  15156. 2025-07-20 15:46:40,633 - sglang - INFO - [2025-07-20 15:46:40 TP0] Decode batch. #running-req: 7, #token: 24588, token usage: 0.65, gen throughput (token/s): 70.00, #queue-req: 0
  15157. 2025-07-20 15:46:40,633 - __main__ - INFO - sglang running req: 7 queue req: 0
  15158. 2025-07-20 15:46:40,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5
  15159. 2025-07-20 15:46:40,741 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4
  15160. 2025-07-20 15:46:40,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2
  15161. 2025-07-20 15:46:40,836 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7
  15162. 2025-07-20 15:46:40,837 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4
  15163. 2025-07-20 15:46:40,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-7
  15164. 2025-07-20 15:46:40,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7
  15165. 2025-07-20 15:46:40,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10
  15166. 2025-07-20 15:46:41,041 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  15167. 2025-07-20 15:46:41,050 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1
  15168. 2025-07-20 15:46:41,137 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9
  15169. 2025-07-20 15:46:41,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing2.pdf-8
  15170. 2025-07-20 15:46:41,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6
  15171. 2025-07-20 15:46:41,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6
  15172. 2025-07-20 15:46:41,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3
  15173. 2025-07-20 15:46:41,240 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-1
  15174. 2025-07-20 15:46:41,441 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5
  15175. 2025-07-20 15:46:41,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9
  15176. 2025-07-20 15:46:41,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11
  15177. 2025-07-20 15:46:41,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13
  15178. 2025-07-20 15:46:41,648 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10
  15179. 2025-07-20 15:46:41,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12
  15180. 2025-07-20 15:46:41,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-5
  15181. 2025-07-20 15:46:41,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-7
  15182. 2025-07-20 15:46:41,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-6
  15183. 2025-07-20 15:46:41,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-4
  15184. 2025-07-20 15:46:42,043 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-8
  15185. 2025-07-20 15:46:42,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8
  15186. 2025-07-20 15:46:42,046 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-1
  15187. 2025-07-20 15:46:42,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-5
  15188. 2025-07-20 15:46:42,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-1
  15189. 2025-07-20 15:46:42,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-2
  15190. 2025-07-20 15:46:42,142 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-4
  15191. 2025-07-20 15:46:42,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8
  15192. 2025-07-20 15:46:42,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2
  15193. 2025-07-20 15:46:42,150 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-10
  15194. 2025-07-20 15:46:42,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14
  15195. 2025-07-20 15:46:42,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-3
  15196. 2025-07-20 15:46:42,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-6
  15197. 2025-07-20 15:46:42,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_sci_tables.pdf-3
  15198. 2025-07-20 15:46:42,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint3.pdf-3
  15199. 2025-07-20 15:46:42,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-9
  15200. 2025-07-20 15:46:42,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-2
  15201. 2025-07-20 15:46:42,546 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-9
  15202. 2025-07-20 15:46:42,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-3
  15203. 2025-07-20 15:46:42,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/form_on_later_pages.pdf-2
  15204. 2025-07-20 15:46:42,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-4
  15205. 2025-07-20 15:46:42,641 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-7
  15206. 2025-07-20 15:46:42,654 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-1
  15207. 2025-07-20 15:46:42,753 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-8
  15208. 2025-07-20 15:46:42,760 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-6
  15209. 2025-07-20 15:46:42,835 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-9
  15210. 2025-07-20 15:46:43,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-1
  15211. 2025-07-20 15:46:43,047 - __main__ - INFO - Built page query for tests/gnarly_pdfs/lots_of_chem_tables.pdf-5
  15212. 2025-07-20 15:46:43,064 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-9
  15213. 2025-07-20 15:46:43,146 - __main__ - INFO - Built page query for tests/gnarly_pdfs/olmo-page-1.pdf-1
  15214. 2025-07-20 15:46:43,335 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-2
  15215. 2025-07-20 15:46:43,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-5
  15216. 2025-07-20 15:46:43,344 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-11
  15217. 2025-07-20 15:46:43,366 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-5
  15218. 2025-07-20 15:46:43,439 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-7
  15219. 2025-07-20 15:46:43,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-10
  15220. 2025-07-20 15:46:43,457 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2
  15221. 2025-07-20 15:46:43,458 - sglang - INFO - [2025-07-20 15:46:43 TP0] Decode batch. #running-req: 7, #token: 24868, token usage: 0.65, gen throughput (token/s): 99.20, #queue-req: 0
  15222. 2025-07-20 15:46:43,458 - __main__ - INFO - sglang running req: 7 queue req: 0
  15223. 2025-07-20 15:46:43,534 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-8
  15224. 2025-07-20 15:46:43,539 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-4
  15225. 2025-07-20 15:46:43,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-4
  15226. 2025-07-20 15:46:43,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-5
  15227. 2025-07-20 15:46:43,555 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-1
  15228. 2025-07-20 15:46:43,562 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-3
  15229. 2025-07-20 15:46:43,644 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-2
  15230. 2025-07-20 15:46:43,660 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-3
  15231. 2025-07-20 15:46:43,740 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-16
  15232. 2025-07-20 15:46:43,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-8
  15233. 2025-07-20 15:46:43,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-15
  15234. 2025-07-20 15:46:43,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-6
  15235. 2025-07-20 15:46:43,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-22
  15236. 2025-07-20 15:46:43,841 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-14
  15237. 2025-07-20 15:46:43,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-12
  15238. 2025-07-20 15:46:43,851 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-7
  15239. 2025-07-20 15:46:43,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/failing_pdf_pg9.pdf-2
  15240. 2025-07-20 15:46:43,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-20
  15241. 2025-07-20 15:46:43,871 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-3
  15242. 2025-07-20 15:46:43,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-4
  15243. 2025-07-20 15:46:43,942 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-23
  15244. 2025-07-20 15:46:43,943 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-24
  15245. 2025-07-20 15:46:43,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-26
  15246. 2025-07-20 15:46:43,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-19
  15247. 2025-07-20 15:46:43,954 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-21
  15248. 2025-07-20 15:46:44,039 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-13
  15249. 2025-07-20 15:46:44,052 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-28
  15250. 2025-07-20 15:46:44,142 - __main__ - INFO - Built page query for tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1
  15251. 2025-07-20 15:46:44,147 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-25
  15252. 2025-07-20 15:46:44,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-18
  15253. 2025-07-20 15:46:44,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/newspaper.pdf-1
  15254. 2025-07-20 15:46:44,254 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-6
  15255. 2025-07-20 15:46:44,255 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-44
  15256. 2025-07-20 15:46:44,260 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-30
  15257. 2025-07-20 15:46:44,348 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-29
  15258. 2025-07-20 15:46:44,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-35
  15259. 2025-07-20 15:46:44,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-17
  15260. 2025-07-20 15:46:44,435 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-34
  15261. 2025-07-20 15:46:44,533 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-56
  15262. 2025-07-20 15:46:44,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-37
  15263. 2025-07-20 15:46:44,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-41
  15264. 2025-07-20 15:46:44,543 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-43
  15265. 2025-07-20 15:46:44,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-46
  15266. 2025-07-20 15:46:44,548 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-36
  15267. 2025-07-20 15:46:44,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-51
  15268. 2025-07-20 15:46:44,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-47
  15269. 2025-07-20 15:46:44,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-38
  15270. 2025-07-20 15:46:44,638 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-27
  15271. 2025-07-20 15:46:44,642 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-32
  15272. 2025-07-20 15:46:44,650 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-1
  15273. 2025-07-20 15:46:44,653 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-59
  15274. 2025-07-20 15:46:44,739 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-33
  15275. 2025-07-20 15:46:44,742 - __main__ - INFO - Queue remaining: 0
  15276. 2025-07-20 15:46:44,742 - __main__ - INFO -
  15277. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  15278. ----------------------------------------------------------------------------------
  15279. sglang_input_tokens 912.38 976.75
  15280. sglang_output_tokens 260.75 281.58
  15281. 2025-07-20 15:46:44,742 - __main__ - INFO -
  15282. Worker ID | finished | started
  15283. ----------+----------+--------
  15284. 0 | 497 | 500
  15285. 1 | 9 | 10
  15286. 2 | 2 | 5
  15287. 3 | 0 | 529
  15288. 2025-07-20 15:46:44,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-49
  15289. 2025-07-20 15:46:44,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-40
  15290. 2025-07-20 15:46:44,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-50
  15291. 2025-07-20 15:46:44,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-65
  15292. 2025-07-20 15:46:44,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-62
  15293. 2025-07-20 15:46:44,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-64
  15294. 2025-07-20 15:46:44,936 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-70
  15295. 2025-07-20 15:46:44,939 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-52
  15296. 2025-07-20 15:46:44,950 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-61
  15297. 2025-07-20 15:46:44,953 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-69
  15298. 2025-07-20 15:46:44,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-71
  15299. 2025-07-20 15:46:44,962 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-55
  15300. 2025-07-20 15:46:45,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-73
  15301. 2025-07-20 15:46:45,037 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-45
  15302. 2025-07-20 15:46:45,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-39
  15303. 2025-07-20 15:46:45,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-72
  15304. 2025-07-20 15:46:45,045 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-63
  15305. 2025-07-20 15:46:45,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-48
  15306. 2025-07-20 15:46:45,134 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-42
  15307. 2025-07-20 15:46:45,156 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-76
  15308. 2025-07-20 15:46:45,163 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-79
  15309. 2025-07-20 15:46:45,235 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-81
  15310. 2025-07-20 15:46:45,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-54
  15311. 2025-07-20 15:46:45,241 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-75
  15312. 2025-07-20 15:46:45,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-77
  15313. 2025-07-20 15:46:45,247 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-68
  15314. 2025-07-20 15:46:45,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-57
  15315. 2025-07-20 15:46:45,249 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-60
  15316. 2025-07-20 15:46:45,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/small_page_size.pdf-1
  15317. 2025-07-20 15:46:45,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-80
  15318. 2025-07-20 15:46:45,345 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-83
  15319. 2025-07-20 15:46:45,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-66
  15320. 2025-07-20 15:46:45,364 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-74
  15321. 2025-07-20 15:46:45,367 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-86
  15322. 2025-07-20 15:46:45,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-7
  15323. 2025-07-20 15:46:45,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-78
  15324. 2025-07-20 15:46:45,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-67
  15325. 2025-07-20 15:46:45,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-94
  15326. 2025-07-20 15:46:45,456 - sglang - INFO - [2025-07-20 15:46:45 TP0] Decode batch. #running-req: 7, #token: 25148, token usage: 0.66, gen throughput (token/s): 139.99, #queue-req: 0
  15327. 2025-07-20 15:46:45,456 - __main__ - INFO - sglang running req: 7 queue req: 0
  15328. 2025-07-20 15:46:45,462 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-89
  15329. 2025-07-20 15:46:45,536 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-85
  15330. 2025-07-20 15:46:45,538 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-58
  15331. 2025-07-20 15:46:45,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-92
  15332. 2025-07-20 15:46:45,558 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-53
  15333. 2025-07-20 15:46:45,563 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-99
  15334. 2025-07-20 15:46:45,635 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-96
  15335. 2025-07-20 15:46:45,662 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-103
  15336. 2025-07-20 15:46:45,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-90
  15337. 2025-07-20 15:46:45,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-84
  15338. 2025-07-20 15:46:45,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-98
  15339. 2025-07-20 15:46:45,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-93
  15340. 2025-07-20 15:46:45,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-101
  15341. 2025-07-20 15:46:45,757 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-91
  15342. 2025-07-20 15:46:45,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-104
  15343. 2025-07-20 15:46:45,839 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-82
  15344. 2025-07-20 15:46:45,850 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-95
  15345. 2025-07-20 15:46:45,853 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-31
  15346. 2025-07-20 15:46:45,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-97
  15347. 2025-07-20 15:46:45,944 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-100
  15348. 2025-07-20 15:46:45,948 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-106
  15349. 2025-07-20 15:46:46,035 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-88
  15350. 2025-07-20 15:46:46,048 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-105
  15351. 2025-07-20 15:46:46,067 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-102
  15352. 2025-07-20 15:46:46,136 - __main__ - INFO - Built page query for tests/gnarly_pdfs/instructions_and_schematics.pdf-87
  15353. 2025-07-20 15:46:46,238 - __main__ - INFO - Built page query for tests/gnarly_pdfs/most_content_in_image_form.pdf-6
  15354. 2025-07-20 15:46:46,260 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-6
  15355. 2025-07-20 15:46:46,341 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-7
  15356. 2025-07-20 15:46:46,351 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-4
  15357. 2025-07-20 15:46:46,359 - __main__ - INFO - Built page query for tests/gnarly_pdfs/some_ocr1.pdf-1
  15358. 2025-07-20 15:46:46,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-9
  15359. 2025-07-20 15:46:46,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-14
  15360. 2025-07-20 15:46:46,559 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-3
  15361. 2025-07-20 15:46:46,561 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-11
  15362. 2025-07-20 15:46:46,637 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-10
  15363. 2025-07-20 15:46:46,655 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-16
  15364. 2025-07-20 15:46:46,743 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-5
  15365. 2025-07-20 15:46:46,751 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4
  15366. 2025-07-20 15:46:46,752 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-2
  15367. 2025-07-20 15:46:46,755 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-21
  15368. 2025-07-20 15:46:46,762 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6
  15369. 2025-07-20 15:46:46,767 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-22
  15370. 2025-07-20 15:46:46,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-25
  15371. 2025-07-20 15:46:46,859 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3
  15372. 2025-07-20 15:46:46,934 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-13
  15373. 2025-07-20 15:46:46,937 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-17
  15374. 2025-07-20 15:46:46,941 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1
  15375. 2025-07-20 15:46:46,945 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5
  15376. 2025-07-20 15:46:46,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-19
  15377. 2025-07-20 15:46:46,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-15
  15378. 2025-07-20 15:46:47,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2
  15379. 2025-07-20 15:46:47,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-27
  15380. 2025-07-20 15:46:47,060 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-23
  15381. 2025-07-20 15:46:47,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8
  15382. 2025-07-20 15:46:47,139 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9
  15383. 2025-07-20 15:46:47,143 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12
  15384. 2025-07-20 15:46:47,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-8
  15385. 2025-07-20 15:46:47,148 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13
  15386. 2025-07-20 15:46:47,149 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17
  15387. 2025-07-20 15:46:47,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18
  15388. 2025-07-20 15:46:47,161 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11
  15389. 2025-07-20 15:46:47,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19
  15390. 2025-07-20 15:46:47,243 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7
  15391. 2025-07-20 15:46:47,248 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14
  15392. 2025-07-20 15:46:47,251 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-12
  15393. 2025-07-20 15:46:47,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-18
  15394. 2025-07-20 15:46:47,253 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15
  15395. 2025-07-20 15:46:47,254 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16
  15396. 2025-07-20 15:46:47,257 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-26
  15397. 2025-07-20 15:46:47,259 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-20
  15398. 2025-07-20 15:46:47,340 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20
  15399. 2025-07-20 15:46:47,352 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22
  15400. 2025-07-20 15:46:47,354 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25
  15401. 2025-07-20 15:46:47,365 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-24
  15402. 2025-07-20 15:46:47,442 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27
  15403. 2025-07-20 15:46:47,455 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26
  15404. 2025-07-20 15:46:47,456 - sglang - INFO - [2025-07-20 15:46:47 TP0] Decode batch. #running-req: 7, #token: 25428, token usage: 0.67, gen throughput (token/s): 140.01, #queue-req: 0
  15405. 2025-07-20 15:46:47,456 - __main__ - INFO - sglang running req: 7 queue req: 0
  15406. 2025-07-20 15:46:47,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23
  15407. 2025-07-20 15:46:47,540 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34
  15408. 2025-07-20 15:46:47,541 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29
  15409. 2025-07-20 15:46:47,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24
  15410. 2025-07-20 15:46:47,556 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33
  15411. 2025-07-20 15:46:47,643 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10
  15412. 2025-07-20 15:46:47,650 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31
  15413. 2025-07-20 15:46:47,659 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28
  15414. 2025-07-20 15:46:47,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40
  15415. 2025-07-20 15:46:47,749 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38
  15416. 2025-07-20 15:46:47,854 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30
  15417. 2025-07-20 15:46:47,935 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35
  15418. 2025-07-20 15:46:47,938 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12
  15419. 2025-07-20 15:46:47,940 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39
  15420. 2025-07-20 15:46:47,947 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36
  15421. 2025-07-20 15:46:48,053 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32
  15422. 2025-07-20 15:46:48,157 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37
  15423. 2025-07-20 15:46:48,236 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8
  15424. 2025-07-20 15:46:48,237 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6
  15425. 2025-07-20 15:46:48,244 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24
  15426. 2025-07-20 15:46:48,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21
  15427. 2025-07-20 15:46:48,336 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7
  15428. 2025-07-20 15:46:48,356 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19
  15429. 2025-07-20 15:46:48,444 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14
  15430. 2025-07-20 15:46:48,450 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4
  15431. 2025-07-20 15:46:48,460 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27
  15432. 2025-07-20 15:46:48,545 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23
  15433. 2025-07-20 15:46:48,549 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25
  15434. 2025-07-20 15:46:48,560 - __main__ - INFO - Built page query for tests/gnarly_pdfs/large_prompt_hint1.pdf-1
  15435. 2025-07-20 15:46:48,636 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20
  15436. 2025-07-20 15:46:48,639 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15
  15437. 2025-07-20 15:46:48,646 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22
  15438. 2025-07-20 15:46:48,651 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-5
  15439. 2025-07-20 15:46:48,659 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11
  15440. 2025-07-20 15:46:48,734 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9
  15441. 2025-07-20 15:46:48,754 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17
  15442. 2025-07-20 15:46:48,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13
  15443. 2025-07-20 15:46:48,837 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-2
  15444. 2025-07-20 15:46:48,840 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16
  15445. 2025-07-20 15:46:48,844 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-1
  15446. 2025-07-20 15:46:48,858 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26
  15447. 2025-07-20 15:46:48,860 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-6
  15448. 2025-07-20 15:46:48,864 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28
  15449. 2025-07-20 15:46:48,955 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1
  15450. 2025-07-20 15:46:48,966 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29
  15451. 2025-07-20 15:46:49,038 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-3
  15452. 2025-07-20 15:46:49,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21
  15453. 2025-07-20 15:46:49,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5
  15454. 2025-07-20 15:46:49,358 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-4
  15455. 2025-07-20 15:46:49,364 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-9
  15456. 2025-07-20 15:46:49,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-8
  15457. 2025-07-20 15:46:49,449 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3
  15458. 2025-07-20 15:46:49,456 - sglang - INFO - [2025-07-20 15:46:49 TP0] Decode batch. #running-req: 7, #token: 25708, token usage: 0.68, gen throughput (token/s): 139.99, #queue-req: 0
  15459. 2025-07-20 15:46:49,456 - __main__ - INFO - sglang running req: 7 queue req: 0
  15460. 2025-07-20 15:46:49,468 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-10
  15461. 2025-07-20 15:46:49,469 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-22
  15462. 2025-07-20 15:46:49,547 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-20
  15463. 2025-07-20 15:46:49,552 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-24
  15464. 2025-07-20 15:46:49,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10
  15465. 2025-07-20 15:46:49,575 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-17
  15466. 2025-07-20 15:46:49,667 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-31
  15467. 2025-07-20 15:46:49,744 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-35
  15468. 2025-07-20 15:46:49,746 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18
  15469. 2025-07-20 15:46:49,756 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-27
  15470. 2025-07-20 15:46:49,760 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-21
  15471. 2025-07-20 15:46:49,769 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-12
  15472. 2025-07-20 15:46:49,771 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-16
  15473. 2025-07-20 15:46:49,838 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-15
  15474. 2025-07-20 15:46:49,843 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-14
  15475. 2025-07-20 15:46:49,845 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-30
  15476. 2025-07-20 15:46:49,852 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-11
  15477. 2025-07-20 15:46:49,857 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-26
  15478. 2025-07-20 15:46:49,863 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-42
  15479. 2025-07-20 15:46:49,872 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-25
  15480. 2025-07-20 15:46:49,949 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-7
  15481. 2025-07-20 15:46:49,951 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-36
  15482. 2025-07-20 15:46:49,959 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-19
  15483. 2025-07-20 15:46:49,969 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-40
  15484. 2025-07-20 15:46:50,036 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-23
  15485. 2025-07-20 15:46:50,040 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-39
  15486. 2025-07-20 15:46:50,042 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-48
  15487. 2025-07-20 15:46:50,044 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-18
  15488. 2025-07-20 15:46:50,051 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-29
  15489. 2025-07-20 15:46:50,069 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-41
  15490. 2025-07-20 15:46:50,071 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-55
  15491. 2025-07-20 15:46:50,133 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-38
  15492. 2025-07-20 15:46:50,141 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-28
  15493. 2025-07-20 15:46:50,144 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-34
  15494. 2025-07-20 15:46:50,151 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-13
  15495. 2025-07-20 15:46:50,153 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-44
  15496. 2025-07-20 15:46:50,155 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-37
  15497. 2025-07-20 15:46:50,158 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-49
  15498. 2025-07-20 15:46:50,252 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-46
  15499. 2025-07-20 15:46:50,262 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-32
  15500. 2025-07-20 15:46:50,277 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-54
  15501. 2025-07-20 15:46:50,338 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-53
  15502. 2025-07-20 15:46:50,349 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-43
  15503. 2025-07-20 15:46:50,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-47
  15504. 2025-07-20 15:46:50,350 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-45
  15505. 2025-07-20 15:46:50,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-33
  15506. 2025-07-20 15:46:50,366 - __main__ - INFO - Built page query for tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2
  15507. 2025-07-20 15:46:50,367 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-68
  15508. 2025-07-20 15:46:50,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-50
  15509. 2025-07-20 15:46:50,374 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-61
  15510. 2025-07-20 15:46:50,440 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-56
  15511. 2025-07-20 15:46:50,443 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-57
  15512. 2025-07-20 15:46:50,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-52
  15513. 2025-07-20 15:46:50,455 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-58
  15514. 2025-07-20 15:46:50,465 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-67
  15515. 2025-07-20 15:46:50,542 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-64
  15516. 2025-07-20 15:46:50,551 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-59
  15517. 2025-07-20 15:46:51,061 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-51
  15518. 2025-07-20 15:46:51,163 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-60
  15519. 2025-07-20 15:46:51,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-4
  15520. 2025-07-20 15:46:51,164 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-5
  15521. 2025-07-20 15:46:51,165 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-62
  15522. 2025-07-20 15:46:51,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-8
  15523. 2025-07-20 15:46:51,360 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-5
  15524. 2025-07-20 15:46:51,361 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-1
  15525. 2025-07-20 15:46:51,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-66
  15526. 2025-07-20 15:46:51,445 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-3
  15527. 2025-07-20 15:46:51,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-2
  15528. 2025-07-20 15:46:51,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-1
  15529. 2025-07-20 15:46:51,447 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-6
  15530. 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-63
  15531. 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-14
  15532. 2025-07-20 15:46:51,448 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-10
  15533. 2025-07-20 15:46:51,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-16
  15534. 2025-07-20 15:46:51,569 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-7
  15535. 2025-07-20 15:46:51,770 - sglang - INFO - [2025-07-20 15:46:51 TP0] Decode batch. #running-req: 5, #token: 20054, token usage: 0.53, gen throughput (token/s): 133.90, #queue-req: 0
  15536. 2025-07-20 15:46:51,770 - __main__ - INFO - sglang running req: 5 queue req: 0
  15537. 2025-07-20 15:46:51,770 - sglang - INFO - [2025-07-20 15:46:51 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 5, #queue-req: 0
  15538. 2025-07-20 15:46:51,770 - __main__ - INFO - sglang running req: 5 queue req: 0
  15539. 2025-07-20 15:46:51,774 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-7
  15540. 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-8
  15541. 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-9
  15542. 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/slideshow_mostly_images.pdf-65
  15543. 2025-07-20 15:46:51,775 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-13
  15544. 2025-07-20 15:46:51,776 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-3
  15545. 2025-07-20 15:46:51,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-4
  15546. 2025-07-20 15:46:51,833 - __main__ - INFO - Built page query for tests/gnarly_pdfs/not_parsing.pdf-6
  15547. 2025-07-20 15:46:51,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-12
  15548. 2025-07-20 15:46:51,834 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-15
  15549. 2025-07-20 15:46:52,660 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-11
  15550. 2025-07-20 15:46:52,661 - __main__ - INFO - Built page query for tests/gnarly_pdfs/load_v_error.pdf-2
  15551. 2025-07-20 15:46:52,665 - sglang - INFO - [2025-07-20 15:46:52 TP0] Prefill batch. #new-seq: 2, #new-token: 7179, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.56, #running-req: 6, #queue-req: 8
  15552. 2025-07-20 15:46:52,665 - __main__ - INFO - sglang running req: 6 queue req: 8
  15553. 2025-07-20 15:46:53,475 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  15554. 2025-07-20 15:46:54,743 - __main__ - INFO - Queue remaining: 0
  15555. 2025-07-20 15:46:54,743 - __main__ - INFO -
  15556. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  15557. ----------------------------------------------------------------------------------
  15558. sglang_input_tokens 908.91 950.67
  15559. sglang_output_tokens 259.81 274.65
  15560. 2025-07-20 15:46:54,743 - __main__ - INFO -
  15561. Worker ID | finished | started
  15562. ----------+----------+--------
  15563. 0 | 497 | 500
  15564. 1 | 9 | 10
  15565. 2 | 4 | 5
  15566. 3 | 0 | 529
  15567. 2025-07-20 15:46:55,637 - sglang - INFO - [2025-07-20 15:46:55] Exception in TokenizerManager:
  15568. 2025-07-20 15:46:55,637 - sglang - INFO - Traceback (most recent call last):
  15569. 2025-07-20 15:46:55,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  15570. 2025-07-20 15:46:55,637 - sglang - INFO - process_result = image_processor(image)
  15571. 2025-07-20 15:46:55,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  15572. 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  15573. 2025-07-20 15:46:55,638 - sglang - INFO - return self.preprocess(images, **kwargs)
  15574. 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15575. 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  15576. 2025-07-20 15:46:55,638 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  15577. 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  15578. 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  15579. 2025-07-20 15:46:55,638 - sglang - INFO - resized_height, resized_width = smart_resize(
  15580. 2025-07-20 15:46:55,638 - sglang - INFO - ^^^^^^^^^^^^^
  15581. 2025-07-20 15:46:55,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  15582. 2025-07-20 15:46:55,638 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  15583. 2025-07-20 15:46:55,638 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  15584. 2025-07-20 15:46:55,638 - sglang - INFO -
  15585. 2025-07-20 15:46:57,188 - sglang - INFO - [2025-07-20 15:46:57 TP0] Decode batch. #running-req: 8, #token: 28709, token usage: 0.76, gen throughput (token/s): 53.95, #queue-req: 83
  15586. 2025-07-20 15:46:57,188 - __main__ - INFO - sglang running req: 8 queue req: 83
  15587. 2025-07-20 15:46:57,636 - sglang - INFO - [2025-07-20 15:46:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1861, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 93
  15588. 2025-07-20 15:46:57,636 - __main__ - INFO - sglang running req: 7 queue req: 93
  15589. 2025-07-20 15:46:59,556 - sglang - INFO - [2025-07-20 15:46:59 TP0] Decode batch. #running-req: 8, #token: 29683, token usage: 0.78, gen throughput (token/s): 134.67, #queue-req: 143
  15590. 2025-07-20 15:46:59,557 - __main__ - INFO - sglang running req: 8 queue req: 143
  15591. 2025-07-20 15:46:59,582 - sglang - INFO - [2025-07-20 15:46:59] ERROR: Exception in ASGI application
  15592. 2025-07-20 15:46:59,582 - sglang - INFO - Traceback (most recent call last):
  15593. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  15594. 2025-07-20 15:46:59,583 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  15595. 2025-07-20 15:46:59,583 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15596. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  15597. 2025-07-20 15:46:59,583 - sglang - INFO - return await self.app(scope, receive, send)
  15598. 2025-07-20 15:46:59,583 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15599. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  15600. 2025-07-20 15:46:59,583 - sglang - INFO - await super().__call__(scope, receive, send)
  15601. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  15602. 2025-07-20 15:46:59,583 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15603. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  15604. 2025-07-20 15:46:59,583 - sglang - INFO - raise exc
  15605. 2025-07-20 15:46:59,583 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  15606. 2025-07-20 15:46:59,584 - sglang - INFO - await self.app(scope, receive, _send)
  15607. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  15608. 2025-07-20 15:46:59,584 - sglang - INFO - await self.app(scope, receive, send)
  15609. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  15610. 2025-07-20 15:46:59,584 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  15611. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15612. 2025-07-20 15:46:59,584 - sglang - INFO - raise exc
  15613. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15614. 2025-07-20 15:46:59,584 - sglang - INFO - await app(scope, receive, sender)
  15615. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  15616. 2025-07-20 15:46:59,584 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15617. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  15618. 2025-07-20 15:46:59,584 - sglang - INFO - await route.handle(scope, receive, send)
  15619. 2025-07-20 15:46:59,584 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  15620. 2025-07-20 15:46:59,585 - sglang - INFO - await self.app(scope, receive, send)
  15621. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  15622. 2025-07-20 15:46:59,585 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  15623. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15624. 2025-07-20 15:46:59,585 - sglang - INFO - raise exc
  15625. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15626. 2025-07-20 15:46:59,585 - sglang - INFO - await app(scope, receive, sender)
  15627. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  15628. 2025-07-20 15:46:59,585 - sglang - INFO - response = await f(request)
  15629. 2025-07-20 15:46:59,585 - sglang - INFO - ^^^^^^^^^^^^^^^^
  15630. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  15631. 2025-07-20 15:46:59,585 - sglang - INFO - raw_response = await run_endpoint_function(
  15632. 2025-07-20 15:46:59,585 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15633. 2025-07-20 15:46:59,585 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  15634. 2025-07-20 15:46:59,585 - sglang - INFO - return await dependant.call(**values)
  15635. 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15636. 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  15637. 2025-07-20 15:46:59,586 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  15638. 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15639. 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  15640. 2025-07-20 15:46:59,586 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  15641. 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15642. 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  15643. 2025-07-20 15:46:59,586 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  15644. 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15645. 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  15646. 2025-07-20 15:46:59,586 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  15647. 2025-07-20 15:46:59,586 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15648. 2025-07-20 15:46:59,586 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  15649. 2025-07-20 15:46:59,586 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  15650. 2025-07-20 15:46:59,587 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15651. 2025-07-20 15:46:59,587 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  15652. 2025-07-20 15:46:59,587 - __main__ - WARNING - ValueError on attempt 0 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  15653. 2025-07-20 15:47:00,135 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  15654. 2025-07-20 15:47:00,455 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
  15655. 2025-07-20 15:47:00,547 - sglang - INFO - [2025-07-20 15:47:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1958, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 7, #queue-req: 164
  15656. 2025-07-20 15:47:00,547 - __main__ - INFO - sglang running req: 7 queue req: 164
  15657. 2025-07-20 15:47:00,635 - __main__ - INFO - Finished TaskGroup for worker on 16158dc6fac58e5a41d3888b9554c3d75b2a5744
  15658. 2025-07-20 15:47:00,635 - __main__ - INFO - Got 1 docs for 16158dc6fac58e5a41d3888b9554c3d75b2a5744
  15659. 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 4 exiting due to empty queue
  15660. 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 5 exiting due to empty queue
  15661. 2025-07-20 15:47:00,638 - __main__ - INFO - Worker 6 exiting due to empty queue
  15662. 2025-07-20 15:47:00,639 - __main__ - INFO - Worker 7 exiting due to empty queue
  15663. 2025-07-20 15:47:00,639 - __main__ - INFO - Worker 2 exiting due to empty queue
  15664. 2025-07-20 15:47:01,862 - sglang - INFO - [2025-07-20 15:47:01 TP0] Decode batch. #running-req: 8, #token: 28667, token usage: 0.75, gen throughput (token/s): 138.38, #queue-req: 205
  15665. 2025-07-20 15:47:01,862 - __main__ - INFO - sglang running req: 8 queue req: 205
  15666. 2025-07-20 15:47:03,233 - sglang - INFO - [2025-07-20 15:47:03 TP0] Decode batch. #running-req: 8, #token: 28987, token usage: 0.76, gen throughput (token/s): 233.27, #queue-req: 243
  15667. 2025-07-20 15:47:03,234 - __main__ - INFO - sglang running req: 8 queue req: 243
  15668. 2025-07-20 15:47:04,557 - sglang - INFO - [2025-07-20 15:47:04 TP0] Decode batch. #running-req: 8, #token: 29307, token usage: 0.77, gen throughput (token/s): 241.78, #queue-req: 282
  15669. 2025-07-20 15:47:04,557 - __main__ - INFO - sglang running req: 8 queue req: 282
  15670. 2025-07-20 15:47:04,744 - __main__ - INFO - Queue remaining: 0
  15671. 2025-07-20 15:47:04,744 - __main__ - INFO -
  15672. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  15673. ----------------------------------------------------------------------------------
  15674. finished_input_tokens 7.35 32.62
  15675. finished_output_tokens 1.76 7.83
  15676. sglang_input_tokens 904.93 895.69
  15677. sglang_output_tokens 258.39 258.44
  15678. 2025-07-20 15:47:04,744 - __main__ - INFO -
  15679. Worker ID | finished | started
  15680. ----------+----------+--------
  15681. 0 | 497 | 500
  15682. 1 | 9 | 10
  15683. 2 | 5 | 5
  15684. 3 | 1 | 529
  15685. 2025-07-20 15:47:05,990 - sglang - INFO - [2025-07-20 15:47:05 TP0] Decode batch. #running-req: 8, #token: 29627, token usage: 0.78, gen throughput (token/s): 223.34, #queue-req: 327
  15686. 2025-07-20 15:47:05,990 - __main__ - INFO - sglang running req: 8 queue req: 327
  15687. 2025-07-20 15:47:07,333 - sglang - INFO - [2025-07-20 15:47:07 TP0] Decode batch. #running-req: 8, #token: 29947, token usage: 0.79, gen throughput (token/s): 238.16, #queue-req: 363
  15688. 2025-07-20 15:47:07,333 - __main__ - INFO - sglang running req: 8 queue req: 363
  15689. 2025-07-20 15:47:07,571 - __main__ - WARNING - JSON decode error on attempt 1 for scripts/data/11445224007035644H44421110A0001.pdf-3: Expecting ',' delimiter: line 1 column 2734 (char 2733)
  15690. 2025-07-20 15:47:07,595 - sglang - INFO - [2025-07-20 15:47:07 TP0] Prefill batch. #new-seq: 1, #new-token: 3732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 7, #queue-req: 371
  15691. 2025-07-20 15:47:07,595 - __main__ - INFO - sglang running req: 7 queue req: 371
  15692. 2025-07-20 15:47:08,256 - __main__ - INFO - Built page query for scripts/data/11445224007035644H44421110A0001.pdf-3
  15693. 2025-07-20 15:47:08,387 - sglang - INFO - [2025-07-20 15:47:08] Exception in TokenizerManager:
  15694. 2025-07-20 15:47:08,388 - sglang - INFO - Traceback (most recent call last):
  15695. 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  15696. 2025-07-20 15:47:08,388 - sglang - INFO - process_result = image_processor(image)
  15697. 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  15698. 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  15699. 2025-07-20 15:47:08,388 - sglang - INFO - return self.preprocess(images, **kwargs)
  15700. 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15701. 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  15702. 2025-07-20 15:47:08,388 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  15703. 2025-07-20 15:47:08,388 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  15704. 2025-07-20 15:47:08,388 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  15705. 2025-07-20 15:47:08,388 - sglang - INFO - resized_height, resized_width = smart_resize(
  15706. 2025-07-20 15:47:08,389 - sglang - INFO - ^^^^^^^^^^^^^
  15707. 2025-07-20 15:47:08,389 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  15708. 2025-07-20 15:47:08,389 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  15709. 2025-07-20 15:47:08,389 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  15710. 2025-07-20 15:47:08,389 - sglang - INFO -
  15711. 2025-07-20 15:47:10,098 - sglang - INFO - [2025-07-20 15:47:10 TP0] Decode batch. #running-req: 8, #token: 28996, token usage: 0.76, gen throughput (token/s): 115.40, #queue-req: 451
  15712. 2025-07-20 15:47:10,098 - __main__ - INFO - sglang running req: 8 queue req: 451
  15713. 2025-07-20 15:47:10,735 - __main__ - INFO - Finished TaskGroup for worker on 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
  15714. 2025-07-20 15:47:10,735 - __main__ - INFO - Got 1 docs for 8d1e4551c46000ba4529a1ac09bae565b95f4ab7
  15715. 2025-07-20 15:47:10,737 - __main__ - INFO - Worker 1 exiting due to empty queue
  15716. 2025-07-20 15:47:10,759 - sglang - INFO - [2025-07-20 15:47:10 TP0] Prefill batch. #new-seq: 2, #new-token: 4253, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 7, #queue-req: 478
  15717. 2025-07-20 15:47:10,759 - __main__ - INFO - sglang running req: 7 queue req: 478
  15718. 2025-07-20 15:47:11,509 - sglang - INFO - [2025-07-20 15:47:11] ERROR: Exception in ASGI application
  15719. 2025-07-20 15:47:11,509 - sglang - INFO - Traceback (most recent call last):
  15720. 2025-07-20 15:47:11,509 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  15721. 2025-07-20 15:47:11,509 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  15722. 2025-07-20 15:47:11,510 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15723. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  15724. 2025-07-20 15:47:11,510 - sglang - INFO - return await self.app(scope, receive, send)
  15725. 2025-07-20 15:47:11,510 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15726. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  15727. 2025-07-20 15:47:11,510 - sglang - INFO - await super().__call__(scope, receive, send)
  15728. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  15729. 2025-07-20 15:47:11,510 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15730. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  15731. 2025-07-20 15:47:11,510 - sglang - INFO - raise exc
  15732. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  15733. 2025-07-20 15:47:11,510 - sglang - INFO - await self.app(scope, receive, _send)
  15734. 2025-07-20 15:47:11,510 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  15735. 2025-07-20 15:47:11,510 - sglang - INFO - await self.app(scope, receive, send)
  15736. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  15737. 2025-07-20 15:47:11,511 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  15738. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15739. 2025-07-20 15:47:11,511 - sglang - INFO - raise exc
  15740. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15741. 2025-07-20 15:47:11,511 - sglang - INFO - await app(scope, receive, sender)
  15742. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  15743. 2025-07-20 15:47:11,511 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15744. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  15745. 2025-07-20 15:47:11,511 - sglang - INFO - await route.handle(scope, receive, send)
  15746. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  15747. 2025-07-20 15:47:11,511 - sglang - INFO - await self.app(scope, receive, send)
  15748. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  15749. 2025-07-20 15:47:11,511 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  15750. 2025-07-20 15:47:11,511 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15751. 2025-07-20 15:47:11,512 - sglang - INFO - raise exc
  15752. 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15753. 2025-07-20 15:47:11,512 - sglang - INFO - await app(scope, receive, sender)
  15754. 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  15755. 2025-07-20 15:47:11,512 - sglang - INFO - response = await f(request)
  15756. 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^
  15757. 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  15758. 2025-07-20 15:47:11,512 - sglang - INFO - raw_response = await run_endpoint_function(
  15759. 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15760. 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  15761. 2025-07-20 15:47:11,512 - sglang - INFO - return await dependant.call(**values)
  15762. 2025-07-20 15:47:11,512 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15763. 2025-07-20 15:47:11,512 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  15764. 2025-07-20 15:47:11,512 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  15765. 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15766. 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  15767. 2025-07-20 15:47:11,513 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  15768. 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15769. 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  15770. 2025-07-20 15:47:11,513 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  15771. 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15772. 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  15773. 2025-07-20 15:47:11,513 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  15774. 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15775. 2025-07-20 15:47:11,513 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  15776. 2025-07-20 15:47:11,513 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  15777. 2025-07-20 15:47:11,513 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15778. 2025-07-20 15:47:11,513 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  15779. 2025-07-20 15:47:11,514 - __main__ - WARNING - ValueError on attempt 1 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  15780. 2025-07-20 15:47:11,653 - __main__ - WARNING - ValueError on attempt 0 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  15781. 2025-07-20 15:47:11,738 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  15782. 2025-07-20 15:47:11,767 - sglang - INFO - [2025-07-20 15:47:11] Exception in TokenizerManager:
  15783. 2025-07-20 15:47:11,767 - sglang - INFO - Traceback (most recent call last):
  15784. 2025-07-20 15:47:11,767 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  15785. 2025-07-20 15:47:11,767 - sglang - INFO - process_result = image_processor(image)
  15786. 2025-07-20 15:47:11,767 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  15787. 2025-07-20 15:47:11,767 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  15788. 2025-07-20 15:47:11,768 - sglang - INFO - return self.preprocess(images, **kwargs)
  15789. 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15790. 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  15791. 2025-07-20 15:47:11,768 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  15792. 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  15793. 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  15794. 2025-07-20 15:47:11,768 - sglang - INFO - resized_height, resized_width = smart_resize(
  15795. 2025-07-20 15:47:11,768 - sglang - INFO - ^^^^^^^^^^^^^
  15796. 2025-07-20 15:47:11,768 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  15797. 2025-07-20 15:47:11,768 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  15798. 2025-07-20 15:47:11,768 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  15799. 2025-07-20 15:47:11,768 - sglang - INFO -
  15800. 2025-07-20 15:47:11,770 - sglang - INFO - [2025-07-20 15:47:11] ERROR: Exception in ASGI application
  15801. 2025-07-20 15:47:11,770 - sglang - INFO - Traceback (most recent call last):
  15802. 2025-07-20 15:47:11,770 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  15803. 2025-07-20 15:47:11,770 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  15804. 2025-07-20 15:47:11,770 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15805. 2025-07-20 15:47:11,770 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  15806. 2025-07-20 15:47:11,770 - sglang - INFO - return await self.app(scope, receive, send)
  15807. 2025-07-20 15:47:11,771 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15808. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  15809. 2025-07-20 15:47:11,771 - sglang - INFO - await super().__call__(scope, receive, send)
  15810. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  15811. 2025-07-20 15:47:11,771 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15812. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  15813. 2025-07-20 15:47:11,771 - sglang - INFO - raise exc
  15814. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  15815. 2025-07-20 15:47:11,771 - sglang - INFO - await self.app(scope, receive, _send)
  15816. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  15817. 2025-07-20 15:47:11,771 - sglang - INFO - await self.app(scope, receive, send)
  15818. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  15819. 2025-07-20 15:47:11,771 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  15820. 2025-07-20 15:47:11,771 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15821. 2025-07-20 15:47:11,771 - sglang - INFO - raise exc
  15822. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15823. 2025-07-20 15:47:11,772 - sglang - INFO - await app(scope, receive, sender)
  15824. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  15825. 2025-07-20 15:47:11,772 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15826. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  15827. 2025-07-20 15:47:11,772 - sglang - INFO - await route.handle(scope, receive, send)
  15828. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  15829. 2025-07-20 15:47:11,772 - sglang - INFO - await self.app(scope, receive, send)
  15830. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  15831. 2025-07-20 15:47:11,772 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  15832. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15833. 2025-07-20 15:47:11,772 - sglang - INFO - raise exc
  15834. 2025-07-20 15:47:11,772 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15835. 2025-07-20 15:47:11,772 - sglang - INFO - await app(scope, receive, sender)
  15836. 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  15837. 2025-07-20 15:47:11,773 - sglang - INFO - response = await f(request)
  15838. 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^
  15839. 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  15840. 2025-07-20 15:47:11,773 - sglang - INFO - raw_response = await run_endpoint_function(
  15841. 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15842. 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  15843. 2025-07-20 15:47:11,773 - sglang - INFO - return await dependant.call(**values)
  15844. 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15845. 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  15846. 2025-07-20 15:47:11,773 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  15847. 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15848. 2025-07-20 15:47:11,773 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  15849. 2025-07-20 15:47:11,773 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  15850. 2025-07-20 15:47:11,773 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15851. 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  15852. 2025-07-20 15:47:11,774 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  15853. 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15854. 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  15855. 2025-07-20 15:47:11,774 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  15856. 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15857. 2025-07-20 15:47:11,774 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  15858. 2025-07-20 15:47:11,774 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  15859. 2025-07-20 15:47:11,774 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15860. 2025-07-20 15:47:11,774 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  15861. 2025-07-20 15:47:11,775 - __main__ - WARNING - ValueError on attempt 2 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  15862. 2025-07-20 15:47:12,085 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  15863. 2025-07-20 15:47:12,117 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
  15864. 2025-07-20 15:47:12,117 - sglang - INFO - Traceback (most recent call last):
  15865. 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  15866. 2025-07-20 15:47:12,117 - sglang - INFO - process_result = image_processor(image)
  15867. 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  15868. 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  15869. 2025-07-20 15:47:12,117 - sglang - INFO - return self.preprocess(images, **kwargs)
  15870. 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15871. 2025-07-20 15:47:12,117 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  15872. 2025-07-20 15:47:12,117 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  15873. 2025-07-20 15:47:12,117 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  15874. 2025-07-20 15:47:12,118 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  15875. 2025-07-20 15:47:12,118 - sglang - INFO - resized_height, resized_width = smart_resize(
  15876. 2025-07-20 15:47:12,118 - sglang - INFO - ^^^^^^^^^^^^^
  15877. 2025-07-20 15:47:12,118 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  15878. 2025-07-20 15:47:12,118 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  15879. 2025-07-20 15:47:12,118 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  15880. 2025-07-20 15:47:12,118 - sglang - INFO -
  15881. 2025-07-20 15:47:12,120 - sglang - INFO - [2025-07-20 15:47:12] ERROR: Exception in ASGI application
  15882. 2025-07-20 15:47:12,120 - sglang - INFO - Traceback (most recent call last):
  15883. 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  15884. 2025-07-20 15:47:12,120 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  15885. 2025-07-20 15:47:12,120 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15886. 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  15887. 2025-07-20 15:47:12,120 - sglang - INFO - return await self.app(scope, receive, send)
  15888. 2025-07-20 15:47:12,120 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15889. 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  15890. 2025-07-20 15:47:12,120 - sglang - INFO - await super().__call__(scope, receive, send)
  15891. 2025-07-20 15:47:12,120 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  15892. 2025-07-20 15:47:12,121 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15893. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  15894. 2025-07-20 15:47:12,121 - sglang - INFO - raise exc
  15895. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  15896. 2025-07-20 15:47:12,121 - sglang - INFO - await self.app(scope, receive, _send)
  15897. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  15898. 2025-07-20 15:47:12,121 - sglang - INFO - await self.app(scope, receive, send)
  15899. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  15900. 2025-07-20 15:47:12,121 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  15901. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15902. 2025-07-20 15:47:12,121 - sglang - INFO - raise exc
  15903. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15904. 2025-07-20 15:47:12,121 - sglang - INFO - await app(scope, receive, sender)
  15905. 2025-07-20 15:47:12,121 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  15906. 2025-07-20 15:47:12,122 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15907. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  15908. 2025-07-20 15:47:12,122 - sglang - INFO - await route.handle(scope, receive, send)
  15909. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  15910. 2025-07-20 15:47:12,122 - sglang - INFO - await self.app(scope, receive, send)
  15911. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  15912. 2025-07-20 15:47:12,122 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  15913. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15914. 2025-07-20 15:47:12,122 - sglang - INFO - raise exc
  15915. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15916. 2025-07-20 15:47:12,122 - sglang - INFO - await app(scope, receive, sender)
  15917. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  15918. 2025-07-20 15:47:12,122 - sglang - INFO - response = await f(request)
  15919. 2025-07-20 15:47:12,122 - sglang - INFO - ^^^^^^^^^^^^^^^^
  15920. 2025-07-20 15:47:12,122 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  15921. 2025-07-20 15:47:12,122 - sglang - INFO - raw_response = await run_endpoint_function(
  15922. 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15923. 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  15924. 2025-07-20 15:47:12,123 - sglang - INFO - return await dependant.call(**values)
  15925. 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15926. 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  15927. 2025-07-20 15:47:12,123 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  15928. 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15929. 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  15930. 2025-07-20 15:47:12,123 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  15931. 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15932. 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  15933. 2025-07-20 15:47:12,123 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  15934. 2025-07-20 15:47:12,123 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15935. 2025-07-20 15:47:12,123 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  15936. 2025-07-20 15:47:12,123 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  15937. 2025-07-20 15:47:12,124 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15938. 2025-07-20 15:47:12,124 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  15939. 2025-07-20 15:47:12,124 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  15940. 2025-07-20 15:47:12,124 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15941. 2025-07-20 15:47:12,124 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  15942. 2025-07-20 15:47:12,132 - __main__ - WARNING - ValueError on attempt 3 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  15943. 2025-07-20 15:47:12,592 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  15944. 2025-07-20 15:47:12,620 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
  15945. 2025-07-20 15:47:12,620 - sglang - INFO - Traceback (most recent call last):
  15946. 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  15947. 2025-07-20 15:47:12,621 - sglang - INFO - process_result = image_processor(image)
  15948. 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  15949. 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  15950. 2025-07-20 15:47:12,621 - sglang - INFO - return self.preprocess(images, **kwargs)
  15951. 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15952. 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  15953. 2025-07-20 15:47:12,621 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  15954. 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  15955. 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  15956. 2025-07-20 15:47:12,621 - sglang - INFO - resized_height, resized_width = smart_resize(
  15957. 2025-07-20 15:47:12,621 - sglang - INFO - ^^^^^^^^^^^^^
  15958. 2025-07-20 15:47:12,621 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  15959. 2025-07-20 15:47:12,621 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  15960. 2025-07-20 15:47:12,622 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  15961. 2025-07-20 15:47:12,622 - sglang - INFO -
  15962. 2025-07-20 15:47:12,634 - sglang - INFO - [2025-07-20 15:47:12] ERROR: Exception in ASGI application
  15963. 2025-07-20 15:47:12,634 - sglang - INFO - Traceback (most recent call last):
  15964. 2025-07-20 15:47:12,634 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  15965. 2025-07-20 15:47:12,634 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  15966. 2025-07-20 15:47:12,634 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15967. 2025-07-20 15:47:12,634 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  15968. 2025-07-20 15:47:12,634 - sglang - INFO - return await self.app(scope, receive, send)
  15969. 2025-07-20 15:47:12,635 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  15970. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  15971. 2025-07-20 15:47:12,635 - sglang - INFO - await super().__call__(scope, receive, send)
  15972. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  15973. 2025-07-20 15:47:12,635 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15974. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  15975. 2025-07-20 15:47:12,635 - sglang - INFO - raise exc
  15976. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  15977. 2025-07-20 15:47:12,635 - sglang - INFO - await self.app(scope, receive, _send)
  15978. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  15979. 2025-07-20 15:47:12,635 - sglang - INFO - await self.app(scope, receive, send)
  15980. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  15981. 2025-07-20 15:47:12,635 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  15982. 2025-07-20 15:47:12,635 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15983. 2025-07-20 15:47:12,635 - sglang - INFO - raise exc
  15984. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15985. 2025-07-20 15:47:12,636 - sglang - INFO - await app(scope, receive, sender)
  15986. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  15987. 2025-07-20 15:47:12,636 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  15988. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  15989. 2025-07-20 15:47:12,636 - sglang - INFO - await route.handle(scope, receive, send)
  15990. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  15991. 2025-07-20 15:47:12,636 - sglang - INFO - await self.app(scope, receive, send)
  15992. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  15993. 2025-07-20 15:47:12,636 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  15994. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  15995. 2025-07-20 15:47:12,636 - sglang - INFO - raise exc
  15996. 2025-07-20 15:47:12,636 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  15997. 2025-07-20 15:47:12,636 - sglang - INFO - await app(scope, receive, sender)
  15998. 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  15999. 2025-07-20 15:47:12,637 - sglang - INFO - response = await f(request)
  16000. 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^
  16001. 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  16002. 2025-07-20 15:47:12,637 - sglang - INFO - raw_response = await run_endpoint_function(
  16003. 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16004. 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  16005. 2025-07-20 15:47:12,637 - sglang - INFO - return await dependant.call(**values)
  16006. 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16007. 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  16008. 2025-07-20 15:47:12,637 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  16009. 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16010. 2025-07-20 15:47:12,637 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  16011. 2025-07-20 15:47:12,637 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  16012. 2025-07-20 15:47:12,637 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16013. 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  16014. 2025-07-20 15:47:12,638 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  16015. 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16016. 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  16017. 2025-07-20 15:47:12,638 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  16018. 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16019. 2025-07-20 15:47:12,638 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  16020. 2025-07-20 15:47:12,638 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  16021. 2025-07-20 15:47:12,638 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16022. 2025-07-20 15:47:12,638 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  16023. 2025-07-20 15:47:12,638 - __main__ - WARNING - ValueError on attempt 4 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  16024. 2025-07-20 15:47:12,936 - sglang - INFO - [2025-07-20 15:47:12 TP0] Decode batch. #running-req: 9, #token: 27603, token usage: 0.73, gen throughput (token/s): 119.10, #queue-req: 520
  16025. 2025-07-20 15:47:12,936 - __main__ - INFO - sglang running req: 9 queue req: 520
  16026. 2025-07-20 15:47:12,965 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  16027. 2025-07-20 15:47:12,999 - sglang - INFO - [2025-07-20 15:47:12] Exception in TokenizerManager:
  16028. 2025-07-20 15:47:12,999 - sglang - INFO - Traceback (most recent call last):
  16029. 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  16030. 2025-07-20 15:47:12,999 - sglang - INFO - process_result = image_processor(image)
  16031. 2025-07-20 15:47:12,999 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  16032. 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  16033. 2025-07-20 15:47:12,999 - sglang - INFO - return self.preprocess(images, **kwargs)
  16034. 2025-07-20 15:47:12,999 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16035. 2025-07-20 15:47:12,999 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  16036. 2025-07-20 15:47:13,000 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  16037. 2025-07-20 15:47:13,000 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  16038. 2025-07-20 15:47:13,000 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  16039. 2025-07-20 15:47:13,000 - sglang - INFO - resized_height, resized_width = smart_resize(
  16040. 2025-07-20 15:47:13,000 - sglang - INFO - ^^^^^^^^^^^^^
  16041. 2025-07-20 15:47:13,000 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  16042. 2025-07-20 15:47:13,000 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  16043. 2025-07-20 15:47:13,000 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  16044. 2025-07-20 15:47:13,000 - sglang - INFO -
  16045. 2025-07-20 15:47:13,004 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
  16046. 2025-07-20 15:47:13,004 - sglang - INFO - Traceback (most recent call last):
  16047. 2025-07-20 15:47:13,004 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  16048. 2025-07-20 15:47:13,004 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  16049. 2025-07-20 15:47:13,004 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16050. 2025-07-20 15:47:13,004 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  16051. 2025-07-20 15:47:13,005 - sglang - INFO - return await self.app(scope, receive, send)
  16052. 2025-07-20 15:47:13,005 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16053. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  16054. 2025-07-20 15:47:13,005 - sglang - INFO - await super().__call__(scope, receive, send)
  16055. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  16056. 2025-07-20 15:47:13,005 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16057. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  16058. 2025-07-20 15:47:13,005 - sglang - INFO - raise exc
  16059. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  16060. 2025-07-20 15:47:13,005 - sglang - INFO - await self.app(scope, receive, _send)
  16061. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  16062. 2025-07-20 15:47:13,005 - sglang - INFO - await self.app(scope, receive, send)
  16063. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  16064. 2025-07-20 15:47:13,005 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  16065. 2025-07-20 15:47:13,005 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16066. 2025-07-20 15:47:13,006 - sglang - INFO - raise exc
  16067. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16068. 2025-07-20 15:47:13,006 - sglang - INFO - await app(scope, receive, sender)
  16069. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  16070. 2025-07-20 15:47:13,006 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16071. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  16072. 2025-07-20 15:47:13,006 - sglang - INFO - await route.handle(scope, receive, send)
  16073. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  16074. 2025-07-20 15:47:13,006 - sglang - INFO - await self.app(scope, receive, send)
  16075. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  16076. 2025-07-20 15:47:13,006 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  16077. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16078. 2025-07-20 15:47:13,006 - sglang - INFO - raise exc
  16079. 2025-07-20 15:47:13,006 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16080. 2025-07-20 15:47:13,007 - sglang - INFO - await app(scope, receive, sender)
  16081. 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  16082. 2025-07-20 15:47:13,007 - sglang - INFO - response = await f(request)
  16083. 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^
  16084. 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  16085. 2025-07-20 15:47:13,007 - sglang - INFO - raw_response = await run_endpoint_function(
  16086. 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16087. 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  16088. 2025-07-20 15:47:13,007 - sglang - INFO - return await dependant.call(**values)
  16089. 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16090. 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  16091. 2025-07-20 15:47:13,007 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  16092. 2025-07-20 15:47:13,007 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16093. 2025-07-20 15:47:13,007 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  16094. 2025-07-20 15:47:13,007 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  16095. 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16096. 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  16097. 2025-07-20 15:47:13,008 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  16098. 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16099. 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  16100. 2025-07-20 15:47:13,008 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  16101. 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16102. 2025-07-20 15:47:13,008 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  16103. 2025-07-20 15:47:13,008 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  16104. 2025-07-20 15:47:13,008 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16105. 2025-07-20 15:47:13,008 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  16106. 2025-07-20 15:47:13,009 - __main__ - WARNING - ValueError on attempt 5 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  16107. 2025-07-20 15:47:13,362 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  16108. 2025-07-20 15:47:13,390 - sglang - INFO - [2025-07-20 15:47:13] Exception in TokenizerManager:
  16109. 2025-07-20 15:47:13,390 - sglang - INFO - Traceback (most recent call last):
  16110. 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  16111. 2025-07-20 15:47:13,390 - sglang - INFO - process_result = image_processor(image)
  16112. 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  16113. 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  16114. 2025-07-20 15:47:13,390 - sglang - INFO - return self.preprocess(images, **kwargs)
  16115. 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16116. 2025-07-20 15:47:13,390 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  16117. 2025-07-20 15:47:13,390 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  16118. 2025-07-20 15:47:13,390 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  16119. 2025-07-20 15:47:13,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  16120. 2025-07-20 15:47:13,391 - sglang - INFO - resized_height, resized_width = smart_resize(
  16121. 2025-07-20 15:47:13,391 - sglang - INFO - ^^^^^^^^^^^^^
  16122. 2025-07-20 15:47:13,391 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  16123. 2025-07-20 15:47:13,391 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  16124. 2025-07-20 15:47:13,391 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  16125. 2025-07-20 15:47:13,391 - sglang - INFO -
  16126. 2025-07-20 15:47:13,394 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
  16127. 2025-07-20 15:47:13,395 - sglang - INFO - Traceback (most recent call last):
  16128. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  16129. 2025-07-20 15:47:13,395 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  16130. 2025-07-20 15:47:13,395 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16131. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  16132. 2025-07-20 15:47:13,395 - sglang - INFO - return await self.app(scope, receive, send)
  16133. 2025-07-20 15:47:13,395 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16134. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  16135. 2025-07-20 15:47:13,395 - sglang - INFO - await super().__call__(scope, receive, send)
  16136. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  16137. 2025-07-20 15:47:13,395 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16138. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  16139. 2025-07-20 15:47:13,395 - sglang - INFO - raise exc
  16140. 2025-07-20 15:47:13,395 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  16141. 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, _send)
  16142. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  16143. 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, send)
  16144. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  16145. 2025-07-20 15:47:13,396 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  16146. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16147. 2025-07-20 15:47:13,396 - sglang - INFO - raise exc
  16148. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16149. 2025-07-20 15:47:13,396 - sglang - INFO - await app(scope, receive, sender)
  16150. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  16151. 2025-07-20 15:47:13,396 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16152. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  16153. 2025-07-20 15:47:13,396 - sglang - INFO - await route.handle(scope, receive, send)
  16154. 2025-07-20 15:47:13,396 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  16155. 2025-07-20 15:47:13,396 - sglang - INFO - await self.app(scope, receive, send)
  16156. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  16157. 2025-07-20 15:47:13,397 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  16158. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16159. 2025-07-20 15:47:13,397 - sglang - INFO - raise exc
  16160. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16161. 2025-07-20 15:47:13,397 - sglang - INFO - await app(scope, receive, sender)
  16162. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  16163. 2025-07-20 15:47:13,397 - sglang - INFO - response = await f(request)
  16164. 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^
  16165. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  16166. 2025-07-20 15:47:13,397 - sglang - INFO - raw_response = await run_endpoint_function(
  16167. 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16168. 2025-07-20 15:47:13,397 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  16169. 2025-07-20 15:47:13,397 - sglang - INFO - return await dependant.call(**values)
  16170. 2025-07-20 15:47:13,397 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16171. 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  16172. 2025-07-20 15:47:13,398 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  16173. 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16174. 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  16175. 2025-07-20 15:47:13,398 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  16176. 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16177. 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  16178. 2025-07-20 15:47:13,398 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  16179. 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16180. 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  16181. 2025-07-20 15:47:13,398 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  16182. 2025-07-20 15:47:13,398 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16183. 2025-07-20 15:47:13,398 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  16184. 2025-07-20 15:47:13,398 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  16185. 2025-07-20 15:47:13,399 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16186. 2025-07-20 15:47:13,399 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  16187. 2025-07-20 15:47:13,399 - __main__ - WARNING - ValueError on attempt 6 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  16188. 2025-07-20 15:47:13,758 - __main__ - INFO - Built page query for tests/gnarly_pdfs/skinnypage.pdf-2
  16189. 2025-07-20 15:47:13,766 - sglang - INFO - [2025-07-20 15:47:13 TP0] Prefill batch. #new-seq: 1, #new-token: 1665, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 519
  16190. 2025-07-20 15:47:13,766 - __main__ - INFO - sglang running req: 8 queue req: 519
  16191. 2025-07-20 15:47:13,786 - sglang - INFO - [2025-07-20 15:47:13] Exception in TokenizerManager:
  16192. 2025-07-20 15:47:13,786 - sglang - INFO - Traceback (most recent call last):
  16193. 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 417, in _process_single_image_task
  16194. 2025-07-20 15:47:13,786 - sglang - INFO - process_result = image_processor(image)
  16195. 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^
  16196. 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/image_processing_utils.py", line 41, in __call__
  16197. 2025-07-20 15:47:13,786 - sglang - INFO - return self.preprocess(images, **kwargs)
  16198. 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16199. 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 417, in preprocess
  16200. 2025-07-20 15:47:13,786 - sglang - INFO - patches, image_grid_thw = self._preprocess(
  16201. 2025-07-20 15:47:13,786 - sglang - INFO - ^^^^^^^^^^^^^^^^^
  16202. 2025-07-20 15:47:13,786 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 269, in _preprocess
  16203. 2025-07-20 15:47:13,786 - sglang - INFO - resized_height, resized_width = smart_resize(
  16204. 2025-07-20 15:47:13,787 - sglang - INFO - ^^^^^^^^^^^^^
  16205. 2025-07-20 15:47:13,787 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py", line 112, in smart_resize
  16206. 2025-07-20 15:47:13,787 - sglang - INFO - raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
  16207. 2025-07-20 15:47:13,787 - sglang - INFO - ValueError: height:1024 or width:17 must be larger than factor:28
  16208. 2025-07-20 15:47:13,787 - sglang - INFO -
  16209. 2025-07-20 15:47:13,791 - sglang - INFO - [2025-07-20 15:47:13] ERROR: Exception in ASGI application
  16210. 2025-07-20 15:47:13,791 - sglang - INFO - Traceback (most recent call last):
  16211. 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
  16212. 2025-07-20 15:47:13,791 - sglang - INFO - result = await app( # type: ignore[func-returns-value]
  16213. 2025-07-20 15:47:13,791 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16214. 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
  16215. 2025-07-20 15:47:13,791 - sglang - INFO - return await self.app(scope, receive, send)
  16216. 2025-07-20 15:47:13,791 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16217. 2025-07-20 15:47:13,791 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
  16218. 2025-07-20 15:47:13,791 - sglang - INFO - await super().__call__(scope, receive, send)
  16219. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/applications.py", line 112, in __call__
  16220. 2025-07-20 15:47:13,792 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16221. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 187, in __call__
  16222. 2025-07-20 15:47:13,792 - sglang - INFO - raise exc
  16223. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
  16224. 2025-07-20 15:47:13,792 - sglang - INFO - await self.app(scope, receive, _send)
  16225. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/cors.py", line 85, in __call__
  16226. 2025-07-20 15:47:13,792 - sglang - INFO - await self.app(scope, receive, send)
  16227. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
  16228. 2025-07-20 15:47:13,792 - sglang - INFO - await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
  16229. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16230. 2025-07-20 15:47:13,792 - sglang - INFO - raise exc
  16231. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16232. 2025-07-20 15:47:13,792 - sglang - INFO - await app(scope, receive, sender)
  16233. 2025-07-20 15:47:13,792 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 714, in __call__
  16234. 2025-07-20 15:47:13,793 - sglang - INFO - await self.middleware_stack(scope, receive, send)
  16235. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 734, in app
  16236. 2025-07-20 15:47:13,793 - sglang - INFO - await route.handle(scope, receive, send)
  16237. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
  16238. 2025-07-20 15:47:13,793 - sglang - INFO - await self.app(scope, receive, send)
  16239. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
  16240. 2025-07-20 15:47:13,793 - sglang - INFO - await wrap_app_handling_exceptions(app, request)(scope, receive, send)
  16241. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
  16242. 2025-07-20 15:47:13,793 - sglang - INFO - raise exc
  16243. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
  16244. 2025-07-20 15:47:13,793 - sglang - INFO - await app(scope, receive, sender)
  16245. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
  16246. 2025-07-20 15:47:13,793 - sglang - INFO - response = await f(request)
  16247. 2025-07-20 15:47:13,793 - sglang - INFO - ^^^^^^^^^^^^^^^^
  16248. 2025-07-20 15:47:13,793 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
  16249. 2025-07-20 15:47:13,794 - sglang - INFO - raw_response = await run_endpoint_function(
  16250. 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16251. 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
  16252. 2025-07-20 15:47:13,794 - sglang - INFO - return await dependant.call(**values)
  16253. 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16254. 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/entrypoints/http_server.py", line 406, in openai_v1_chat_completions
  16255. 2025-07-20 15:47:13,794 - sglang - INFO - return await v1_chat_completions(_global_state.tokenizer_manager, raw_request)
  16256. 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16257. 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/openai_api/adapter.py", line 1426, in v1_chat_completions
  16258. 2025-07-20 15:47:13,794 - sglang - INFO - ret = await tokenizer_manager.generate_request(
  16259. 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16260. 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 291, in generate_request
  16261. 2025-07-20 15:47:13,794 - sglang - INFO - tokenized_obj = await self._tokenize_one_request(obj)
  16262. 2025-07-20 15:47:13,794 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16263. 2025-07-20 15:47:13,794 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/tokenizer_manager.py", line 331, in _tokenize_one_request
  16264. 2025-07-20 15:47:13,794 - sglang - INFO - image_inputs: Dict = await self.image_processor.process_images_async(
  16265. 2025-07-20 15:47:13,795 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16266. 2025-07-20 15:47:13,795 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/image_processor.py", line 474, in process_images_async
  16267. 2025-07-20 15:47:13,795 - sglang - INFO - pixel_values, image_hash, image_size, image_grid_thw = (
  16268. 2025-07-20 15:47:13,795 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  16269. 2025-07-20 15:47:13,795 - sglang - INFO - TypeError: cannot unpack non-iterable NoneType object
  16270. 2025-07-20 15:47:13,795 - __main__ - WARNING - ValueError on attempt 7 for tests/gnarly_pdfs/skinnypage.pdf-2: <class 'ValueError'> - Got InternalServerError from server: b'Internal Server Error', skipping this response
  16271. 2025-07-20 15:47:13,796 - __main__ - ERROR - Failed to process tests/gnarly_pdfs/skinnypage.pdf-2 after 8 attempts.
  16272. 2025-07-20 15:47:14,614 - sglang - INFO - [2025-07-20 15:47:14 TP0] Decode batch. #running-req: 9, #token: 27427, token usage: 0.72, gen throughput (token/s): 213.85, #queue-req: 519
  16273. 2025-07-20 15:47:14,615 - __main__ - INFO - sglang running req: 9 queue req: 519
  16274. 2025-07-20 15:47:14,745 - __main__ - INFO - Queue remaining: 0
  16275. 2025-07-20 15:47:14,746 - __main__ - INFO -
  16276. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16277. ----------------------------------------------------------------------------------
  16278. finished_input_tokens 30.85 137.88
  16279. finished_output_tokens 11.83 52.86
  16280. sglang_input_tokens 904.76 888.36
  16281. sglang_output_tokens 259.71 263.92
  16282. 2025-07-20 15:47:14,746 - __main__ - INFO -
  16283. Worker ID | errored | finished | started
  16284. ----------+---------+----------+--------
  16285. 0 | 0 | 497 | 500
  16286. 1 | 0 | 10 | 10
  16287. 2 | 0 | 5 | 5
  16288. 3 | 1 | 2 | 529
  16289. 2025-07-20 15:47:14,907 - sglang - INFO - [2025-07-20 15:47:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2652, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 8, #queue-req: 518
  16290. 2025-07-20 15:47:14,907 - __main__ - INFO - sglang running req: 8 queue req: 518
  16291. 2025-07-20 15:47:16,391 - sglang - INFO - [2025-07-20 15:47:16 TP0] Decode batch. #running-req: 9, #token: 28177, token usage: 0.74, gen throughput (token/s): 202.06, #queue-req: 518
  16292. 2025-07-20 15:47:16,391 - __main__ - INFO - sglang running req: 9 queue req: 518
  16293. 2025-07-20 15:47:16,430 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16294. 2025-07-20 15:47:16,788 - __main__ - WARNING - ValueError on attempt 1 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16295. 2025-07-20 15:47:17,385 - sglang - INFO - [2025-07-20 15:47:17 TP0] Decode batch. #running-req: 9, #token: 28537, token usage: 0.75, gen throughput (token/s): 362.07, #queue-req: 518
  16296. 2025-07-20 15:47:17,386 - __main__ - INFO - sglang running req: 9 queue req: 518
  16297. 2025-07-20 15:47:18,401 - sglang - INFO - [2025-07-20 15:47:18 TP0] Decode batch. #running-req: 9, #token: 28897, token usage: 0.76, gen throughput (token/s): 354.31, #queue-req: 518
  16298. 2025-07-20 15:47:18,402 - __main__ - INFO - sglang running req: 9 queue req: 518
  16299. 2025-07-20 15:47:19,510 - sglang - INFO - [2025-07-20 15:47:19 TP0] Decode batch. #running-req: 9, #token: 29257, token usage: 0.77, gen throughput (token/s): 324.66, #queue-req: 518
  16300. 2025-07-20 15:47:19,511 - __main__ - INFO - sglang running req: 9 queue req: 518
  16301. 2025-07-20 15:47:20,500 - sglang - INFO - [2025-07-20 15:47:20 TP0] Decode batch. #running-req: 9, #token: 29617, token usage: 0.78, gen throughput (token/s): 363.90, #queue-req: 518
  16302. 2025-07-20 15:47:20,500 - __main__ - INFO - sglang running req: 9 queue req: 518
  16303. 2025-07-20 15:47:21,480 - sglang - INFO - [2025-07-20 15:47:21 TP0] Decode batch. #running-req: 9, #token: 29977, token usage: 0.79, gen throughput (token/s): 367.29, #queue-req: 518
  16304. 2025-07-20 15:47:21,480 - __main__ - INFO - sglang running req: 9 queue req: 518
  16305. 2025-07-20 15:47:21,784 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16306. 2025-07-20 15:47:22,060 - __main__ - WARNING - ValueError on attempt 2 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16307. 2025-07-20 15:47:22,124 - __main__ - WARNING - JSON decode error on attempt 2 for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12: Unterminated string starting at: line 1 column 125 (char 124)
  16308. 2025-07-20 15:47:22,142 - sglang - INFO - [2025-07-20 15:47:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 517
  16309. 2025-07-20 15:47:22,142 - __main__ - INFO - sglang running req: 8 queue req: 517
  16310. 2025-07-20 15:47:22,402 - __main__ - INFO - Built page query for scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12
  16311. 2025-07-20 15:47:23,301 - sglang - INFO - [2025-07-20 15:47:23 TP0] Decode batch. #running-req: 9, #token: 29055, token usage: 0.76, gen throughput (token/s): 197.12, #queue-req: 518
  16312. 2025-07-20 15:47:23,301 - __main__ - INFO - sglang running req: 9 queue req: 518
  16313. 2025-07-20 15:47:24,101 - sglang - INFO - [2025-07-20 15:47:24 TP0] Prefill batch. #new-seq: 1, #new-token: 1677, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 517
  16314. 2025-07-20 15:47:24,101 - __main__ - INFO - sglang running req: 8 queue req: 517
  16315. 2025-07-20 15:47:24,747 - __main__ - INFO - Queue remaining: 0
  16316. 2025-07-20 15:47:24,747 - __main__ - INFO -
  16317. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16318. ----------------------------------------------------------------------------------
  16319. finished_input_tokens 30.62 137.88
  16320. finished_output_tokens 11.74 52.86
  16321. sglang_input_tokens 902.88 869.90
  16322. sglang_output_tokens 259.10 259.87
  16323. 2025-07-20 15:47:24,748 - __main__ - INFO -
  16324. Worker ID | errored | finished | started
  16325. ----------+---------+----------+--------
  16326. 0 | 0 | 497 | 500
  16327. 1 | 0 | 10 | 10
  16328. 2 | 0 | 5 | 5
  16329. 3 | 1 | 4 | 529
  16330. 2025-07-20 15:47:24,937 - sglang - INFO - [2025-07-20 15:47:24 TP0] Decode batch. #running-req: 9, #token: 29106, token usage: 0.77, gen throughput (token/s): 219.44, #queue-req: 517
  16331. 2025-07-20 15:47:24,937 - __main__ - INFO - sglang running req: 9 queue req: 517
  16332. 2025-07-20 15:47:25,318 - __main__ - WARNING - JSON decode error on attempt 2 for scripts/data/11445200MB2D6222364440125017008.pdf-13: Unterminated string starting at: line 1 column 125 (char 124)
  16333. 2025-07-20 15:47:25,334 - sglang - INFO - [2025-07-20 15:47:25 TP0] Prefill batch. #new-seq: 2, #new-token: 5740, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.61, #running-req: 8, #queue-req: 515
  16334. 2025-07-20 15:47:25,334 - __main__ - INFO - sglang running req: 8 queue req: 515
  16335. 2025-07-20 15:47:25,589 - __main__ - INFO - Built page query for scripts/data/11445200MB2D6222364440125017008.pdf-13
  16336. 2025-07-20 15:47:27,163 - sglang - INFO - [2025-07-20 15:47:27 TP0] Prefill batch. #new-seq: 1, #new-token: 3205, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 515
  16337. 2025-07-20 15:47:27,163 - __main__ - INFO - sglang running req: 9 queue req: 515
  16338. 2025-07-20 15:47:27,285 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16339. 2025-07-20 15:47:27,533 - __main__ - WARNING - ValueError on attempt 3 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16340. 2025-07-20 15:47:28,698 - sglang - INFO - [2025-07-20 15:47:28 TP0] Decode batch. #running-req: 10, #token: 32321, token usage: 0.85, gen throughput (token/s): 101.58, #queue-req: 515
  16341. 2025-07-20 15:47:28,698 - __main__ - INFO - sglang running req: 10 queue req: 515
  16342. 2025-07-20 15:47:29,689 - sglang - INFO - [2025-07-20 15:47:29 TP0] Decode batch. #running-req: 10, #token: 32721, token usage: 0.86, gen throughput (token/s): 403.49, #queue-req: 515
  16343. 2025-07-20 15:47:29,689 - __main__ - INFO - sglang running req: 10 queue req: 515
  16344. 2025-07-20 15:47:30,211 - sglang - INFO - [2025-07-20 15:47:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2912, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 514
  16345. 2025-07-20 15:47:30,211 - __main__ - INFO - sglang running req: 9 queue req: 514
  16346. 2025-07-20 15:47:31,546 - sglang - INFO - [2025-07-20 15:47:31 TP0] Decode batch. #running-req: 10, #token: 31594, token usage: 0.83, gen throughput (token/s): 214.81, #queue-req: 514
  16347. 2025-07-20 15:47:31,547 - __main__ - INFO - sglang running req: 10 queue req: 514
  16348. 2025-07-20 15:47:32,494 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16349. 2025-07-20 15:47:32,667 - sglang - INFO - [2025-07-20 15:47:32 TP0] Decode batch. #running-req: 10, #token: 31994, token usage: 0.84, gen throughput (token/s): 357.05, #queue-req: 514
  16350. 2025-07-20 15:47:32,667 - __main__ - INFO - sglang running req: 10 queue req: 514
  16351. 2025-07-20 15:47:32,992 - __main__ - WARNING - ValueError on attempt 4 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16352. 2025-07-20 15:47:33,735 - sglang - INFO - [2025-07-20 15:47:33 TP0] Decode batch. #running-req: 10, #token: 32394, token usage: 0.85, gen throughput (token/s): 374.52, #queue-req: 514
  16353. 2025-07-20 15:47:33,735 - __main__ - INFO - sglang running req: 10 queue req: 514
  16354. 2025-07-20 15:47:34,749 - __main__ - INFO - Queue remaining: 0
  16355. 2025-07-20 15:47:34,749 - __main__ - INFO -
  16356. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16357. ----------------------------------------------------------------------------------
  16358. finished_input_tokens 30.39 137.88
  16359. finished_output_tokens 11.65 52.86
  16360. sglang_input_tokens 903.53 859.63
  16361. sglang_output_tokens 259.07 258.80
  16362. 2025-07-20 15:47:34,749 - __main__ - INFO -
  16363. Worker ID | errored | finished | started
  16364. ----------+---------+----------+--------
  16365. 0 | 0 | 497 | 500
  16366. 1 | 0 | 10 | 10
  16367. 2 | 0 | 5 | 5
  16368. 3 | 1 | 7 | 529
  16369. 2025-07-20 15:47:34,749 - sglang - INFO - [2025-07-20 15:47:34 TP0] Decode batch. #running-req: 9, #token: 30869, token usage: 0.81, gen throughput (token/s): 388.23, #queue-req: 514
  16370. 2025-07-20 15:47:34,750 - __main__ - INFO - sglang running req: 9 queue req: 514
  16371. 2025-07-20 15:47:35,732 - sglang - INFO - [2025-07-20 15:47:35 TP0] Decode batch. #running-req: 9, #token: 31229, token usage: 0.82, gen throughput (token/s): 366.41, #queue-req: 514
  16372. 2025-07-20 15:47:35,732 - __main__ - INFO - sglang running req: 9 queue req: 514
  16373. 2025-07-20 15:47:36,517 - sglang - INFO - [2025-07-20 15:47:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2586, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 513
  16374. 2025-07-20 15:47:36,518 - __main__ - INFO - sglang running req: 8 queue req: 513
  16375. 2025-07-20 15:47:37,533 - sglang - INFO - [2025-07-20 15:47:37 TP0] Decode batch. #running-req: 9, #token: 29582, token usage: 0.78, gen throughput (token/s): 199.39, #queue-req: 513
  16376. 2025-07-20 15:47:37,533 - __main__ - INFO - sglang running req: 9 queue req: 513
  16377. 2025-07-20 15:47:37,828 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16378. 2025-07-20 15:47:38,074 - __main__ - WARNING - ValueError on attempt 5 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16379. 2025-07-20 15:47:38,519 - sglang - INFO - [2025-07-20 15:47:38 TP0] Decode batch. #running-req: 9, #token: 29942, token usage: 0.79, gen throughput (token/s): 365.13, #queue-req: 513
  16380. 2025-07-20 15:47:38,519 - __main__ - INFO - sglang running req: 9 queue req: 513
  16381. 2025-07-20 15:47:39,571 - sglang - INFO - [2025-07-20 15:47:39 TP0] Decode batch. #running-req: 9, #token: 30302, token usage: 0.80, gen throughput (token/s): 342.16, #queue-req: 513
  16382. 2025-07-20 15:47:39,571 - __main__ - INFO - sglang running req: 9 queue req: 513
  16383. 2025-07-20 15:47:39,794 - sglang - INFO - [2025-07-20 15:47:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2147, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 512
  16384. 2025-07-20 15:47:39,794 - __main__ - INFO - sglang running req: 8 queue req: 512
  16385. 2025-07-20 15:47:41,477 - sglang - INFO - [2025-07-20 15:47:41 TP0] Decode batch. #running-req: 9, #token: 28236, token usage: 0.74, gen throughput (token/s): 188.38, #queue-req: 512
  16386. 2025-07-20 15:47:41,477 - __main__ - INFO - sglang running req: 9 queue req: 512
  16387. 2025-07-20 15:47:42,135 - sglang - INFO - [2025-07-20 15:47:42 TP0] Prefill batch. #new-seq: 1, #new-token: 3094, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 511
  16388. 2025-07-20 15:47:42,135 - __main__ - INFO - sglang running req: 8 queue req: 511
  16389. 2025-07-20 15:47:43,397 - sglang - INFO - [2025-07-20 15:47:43 TP0] Decode batch. #running-req: 9, #token: 28425, token usage: 0.75, gen throughput (token/s): 186.91, #queue-req: 511
  16390. 2025-07-20 15:47:43,398 - __main__ - INFO - sglang running req: 9 queue req: 511
  16391. 2025-07-20 15:47:43,614 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16392. 2025-07-20 15:47:43,949 - __main__ - WARNING - ValueError on attempt 6 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16393. 2025-07-20 15:47:44,382 - sglang - INFO - [2025-07-20 15:47:44 TP0] Decode batch. #running-req: 9, #token: 28785, token usage: 0.76, gen throughput (token/s): 365.76, #queue-req: 511
  16394. 2025-07-20 15:47:44,382 - __main__ - INFO - sglang running req: 9 queue req: 511
  16395. 2025-07-20 15:47:44,750 - __main__ - INFO - Queue remaining: 0
  16396. 2025-07-20 15:47:44,750 - __main__ - INFO -
  16397. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16398. ----------------------------------------------------------------------------------
  16399. finished_input_tokens 30.17 137.88
  16400. finished_output_tokens 11.57 52.86
  16401. sglang_input_tokens 904.02 870.38
  16402. sglang_output_tokens 259.17 262.51
  16403. 2025-07-20 15:47:44,750 - __main__ - INFO -
  16404. Worker ID | errored | finished | started
  16405. ----------+---------+----------+--------
  16406. 0 | 0 | 497 | 500
  16407. 1 | 0 | 10 | 10
  16408. 2 | 0 | 5 | 5
  16409. 3 | 1 | 10 | 529
  16410. 2025-07-20 15:47:45,371 - sglang - INFO - [2025-07-20 15:47:45 TP0] Decode batch. #running-req: 9, #token: 29145, token usage: 0.77, gen throughput (token/s): 363.94, #queue-req: 511
  16411. 2025-07-20 15:47:45,371 - __main__ - INFO - sglang running req: 9 queue req: 511
  16412. 2025-07-20 15:47:46,355 - sglang - INFO - [2025-07-20 15:47:46 TP0] Decode batch. #running-req: 9, #token: 29505, token usage: 0.78, gen throughput (token/s): 365.85, #queue-req: 511
  16413. 2025-07-20 15:47:46,355 - __main__ - INFO - sglang running req: 9 queue req: 511
  16414. 2025-07-20 15:47:47,338 - sglang - INFO - [2025-07-20 15:47:47 TP0] Decode batch. #running-req: 9, #token: 29865, token usage: 0.79, gen throughput (token/s): 366.14, #queue-req: 511
  16415. 2025-07-20 15:47:47,338 - __main__ - INFO - sglang running req: 9 queue req: 511
  16416. 2025-07-20 15:47:48,319 - sglang - INFO - [2025-07-20 15:47:48 TP0] Decode batch. #running-req: 9, #token: 30225, token usage: 0.80, gen throughput (token/s): 366.86, #queue-req: 511
  16417. 2025-07-20 15:47:48,320 - __main__ - INFO - sglang running req: 9 queue req: 511
  16418. 2025-07-20 15:47:48,446 - __main__ - INFO - Built page query for tests/gnarly_pdfs/map1.pdf-1
  16419. 2025-07-20 15:47:48,734 - __main__ - WARNING - ValueError on attempt 7 for tests/gnarly_pdfs/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  16420. 2025-07-20 15:47:48,735 - __main__ - ERROR - Failed to process tests/gnarly_pdfs/map1.pdf-1 after 8 attempts.
  16421. 2025-07-20 15:47:49,112 - __main__ - ERROR - Document tests/gnarly_pdfs/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
  16422. 2025-07-20 15:47:49,113 - sglang - INFO - [2025-07-20 15:47:48 TP0] Prefill batch. #new-seq: 1, #new-token: 3020, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 510
  16423. 2025-07-20 15:47:49,113 - __main__ - INFO - sglang running req: 8 queue req: 510
  16424. 2025-07-20 15:47:50,207 - sglang - INFO - [2025-07-20 15:47:50 TP0] Decode batch. #running-req: 9, #token: 29711, token usage: 0.78, gen throughput (token/s): 190.20, #queue-req: 510
  16425. 2025-07-20 15:47:50,207 - __main__ - INFO - sglang running req: 9 queue req: 510
  16426. 2025-07-20 15:47:50,304 - sglang - INFO - [2025-07-20 15:47:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2297, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 509
  16427. 2025-07-20 15:47:50,304 - __main__ - INFO - sglang running req: 8 queue req: 509
  16428. 2025-07-20 15:47:51,308 - sglang - INFO - [2025-07-20 15:47:51 TP0] Prefill batch. #new-seq: 2, #new-token: 4138, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 507
  16429. 2025-07-20 15:47:51,309 - __main__ - INFO - sglang running req: 8 queue req: 507
  16430. 2025-07-20 15:47:53,095 - sglang - INFO - [2025-07-20 15:47:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2902, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 506
  16431. 2025-07-20 15:47:53,095 - __main__ - INFO - sglang running req: 9 queue req: 506
  16432. 2025-07-20 15:47:54,208 - sglang - INFO - [2025-07-20 15:47:54 TP0] Decode batch. #running-req: 10, #token: 28558, token usage: 0.75, gen throughput (token/s): 95.72, #queue-req: 506
  16433. 2025-07-20 15:47:54,208 - __main__ - INFO - sglang running req: 10 queue req: 506
  16434. 2025-07-20 15:47:54,429 - sglang - INFO - [2025-07-20 15:47:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2889, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 9, #queue-req: 505
  16435. 2025-07-20 15:47:54,430 - __main__ - INFO - sglang running req: 9 queue req: 505
  16436. 2025-07-20 15:47:54,751 - __main__ - INFO - Queue remaining: 0
  16437. 2025-07-20 15:47:54,751 - __main__ - INFO -
  16438. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16439. ----------------------------------------------------------------------------------
  16440. finished_input_tokens 29.95 137.88
  16441. finished_output_tokens 11.48 52.86
  16442. sglang_input_tokens 907.75 880.16
  16443. sglang_output_tokens 259.92 263.96
  16444. 2025-07-20 15:47:54,751 - __main__ - INFO -
  16445. Worker ID | errored | finished | started
  16446. ----------+---------+----------+--------
  16447. 0 | 0 | 497 | 500
  16448. 1 | 0 | 10 | 10
  16449. 2 | 0 | 5 | 5
  16450. 3 | 2 | 15 | 529
  16451. 2025-07-20 15:47:56,021 - sglang - INFO - [2025-07-20 15:47:56 TP0] Decode batch. #running-req: 10, #token: 28804, token usage: 0.76, gen throughput (token/s): 220.13, #queue-req: 505
  16452. 2025-07-20 15:47:56,021 - __main__ - INFO - sglang running req: 10 queue req: 505
  16453. 2025-07-20 15:47:57,004 - sglang - INFO - [2025-07-20 15:47:57 TP0] Decode batch. #running-req: 10, #token: 29204, token usage: 0.77, gen throughput (token/s): 406.94, #queue-req: 505
  16454. 2025-07-20 15:47:57,004 - __main__ - INFO - sglang running req: 10 queue req: 505
  16455. 2025-07-20 15:47:57,423 - sglang - INFO - [2025-07-20 15:47:57 TP0] Prefill batch. #new-seq: 1, #new-token: 2650, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 504
  16456. 2025-07-20 15:47:57,423 - __main__ - INFO - sglang running req: 9 queue req: 504
  16457. 2025-07-20 15:47:58,772 - sglang - INFO - [2025-07-20 15:47:58 TP0] Decode batch. #running-req: 10, #token: 29658, token usage: 0.78, gen throughput (token/s): 225.56, #queue-req: 504
  16458. 2025-07-20 15:47:58,773 - __main__ - INFO - sglang running req: 10 queue req: 504
  16459. 2025-07-20 15:47:58,944 - sglang - INFO - [2025-07-20 15:47:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2834, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 9, #queue-req: 503
  16460. 2025-07-20 15:47:58,945 - __main__ - INFO - sglang running req: 9 queue req: 503
  16461. 2025-07-20 15:48:00,615 - sglang - INFO - [2025-07-20 15:48:00 TP0] Decode batch. #running-req: 10, #token: 29194, token usage: 0.77, gen throughput (token/s): 216.57, #queue-req: 503
  16462. 2025-07-20 15:48:00,615 - __main__ - INFO - sglang running req: 10 queue req: 503
  16463. 2025-07-20 15:48:01,599 - sglang - INFO - [2025-07-20 15:48:01 TP0] Decode batch. #running-req: 10, #token: 29594, token usage: 0.78, gen throughput (token/s): 406.33, #queue-req: 503
  16464. 2025-07-20 15:48:01,600 - __main__ - INFO - sglang running req: 10 queue req: 503
  16465. 2025-07-20 15:48:02,583 - sglang - INFO - [2025-07-20 15:48:02 TP0] Decode batch. #running-req: 10, #token: 29994, token usage: 0.79, gen throughput (token/s): 406.46, #queue-req: 503
  16466. 2025-07-20 15:48:02,584 - __main__ - INFO - sglang running req: 10 queue req: 503
  16467. 2025-07-20 15:48:03,397 - sglang - INFO - [2025-07-20 15:48:03 TP0] Prefill batch. #new-seq: 1, #new-token: 3545, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 502
  16468. 2025-07-20 15:48:03,397 - __main__ - INFO - sglang running req: 9 queue req: 502
  16469. 2025-07-20 15:48:04,574 - sglang - INFO - [2025-07-20 15:48:04 TP0] Decode batch. #running-req: 10, #token: 31867, token usage: 0.84, gen throughput (token/s): 200.43, #queue-req: 502
  16470. 2025-07-20 15:48:04,574 - __main__ - INFO - sglang running req: 10 queue req: 502
  16471. 2025-07-20 15:48:04,752 - __main__ - INFO - Queue remaining: 0
  16472. 2025-07-20 15:48:04,753 - __main__ - INFO -
  16473. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16474. ----------------------------------------------------------------------------------
  16475. finished_input_tokens 29.74 137.88
  16476. finished_output_tokens 11.40 52.86
  16477. sglang_input_tokens 906.13 863.87
  16478. sglang_output_tokens 259.15 257.14
  16479. 2025-07-20 15:48:04,753 - __main__ - INFO -
  16480. Worker ID | errored | finished | started
  16481. ----------+---------+----------+--------
  16482. 0 | 0 | 497 | 500
  16483. 1 | 0 | 10 | 10
  16484. 2 | 0 | 5 | 5
  16485. 3 | 2 | 18 | 529
  16486. 2025-07-20 15:48:05,563 - sglang - INFO - [2025-07-20 15:48:05 TP0] Decode batch. #running-req: 10, #token: 32267, token usage: 0.85, gen throughput (token/s): 404.40, #queue-req: 502
  16487. 2025-07-20 15:48:05,563 - __main__ - INFO - sglang running req: 10 queue req: 502
  16488. 2025-07-20 15:48:06,280 - sglang - INFO - [2025-07-20 15:48:06 TP0] Prefill batch. #new-seq: 1, #new-token: 1258, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 501
  16489. 2025-07-20 15:48:06,280 - __main__ - INFO - sglang running req: 9 queue req: 501
  16490. 2025-07-20 15:48:07,091 - sglang - INFO - [2025-07-20 15:48:07 TP0] Decode batch. #running-req: 10, #token: 30019, token usage: 0.79, gen throughput (token/s): 261.12, #queue-req: 501
  16491. 2025-07-20 15:48:07,092 - __main__ - INFO - sglang running req: 10 queue req: 501
  16492. 2025-07-20 15:48:07,668 - sglang - INFO - [2025-07-20 15:48:07 TP0] Prefill batch. #new-seq: 1, #new-token: 2419, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.76, #running-req: 9, #queue-req: 500
  16493. 2025-07-20 15:48:07,668 - __main__ - INFO - sglang running req: 9 queue req: 500
  16494. 2025-07-20 15:48:08,688 - sglang - INFO - [2025-07-20 15:48:08 TP0] Prefill batch. #new-seq: 1, #new-token: 2514, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 499
  16495. 2025-07-20 15:48:08,689 - __main__ - INFO - sglang running req: 9 queue req: 499
  16496. 2025-07-20 15:48:09,650 - sglang - INFO - [2025-07-20 15:48:09 TP0] Decode batch. #running-req: 10, #token: 30279, token usage: 0.80, gen throughput (token/s): 155.53, #queue-req: 499
  16497. 2025-07-20 15:48:09,651 - __main__ - INFO - sglang running req: 10 queue req: 499
  16498. 2025-07-20 15:48:10,637 - sglang - INFO - [2025-07-20 15:48:10 TP0] Decode batch. #running-req: 10, #token: 30679, token usage: 0.81, gen throughput (token/s): 405.59, #queue-req: 499
  16499. 2025-07-20 15:48:10,637 - __main__ - INFO - sglang running req: 10 queue req: 499
  16500. 2025-07-20 15:48:11,623 - sglang - INFO - [2025-07-20 15:48:11 TP0] Decode batch. #running-req: 10, #token: 31079, token usage: 0.82, gen throughput (token/s): 405.54, #queue-req: 499
  16501. 2025-07-20 15:48:11,623 - __main__ - INFO - sglang running req: 10 queue req: 499
  16502. 2025-07-20 15:48:12,611 - sglang - INFO - [2025-07-20 15:48:12 TP0] Decode batch. #running-req: 10, #token: 31479, token usage: 0.83, gen throughput (token/s): 404.73, #queue-req: 499
  16503. 2025-07-20 15:48:12,611 - __main__ - INFO - sglang running req: 10 queue req: 499
  16504. 2025-07-20 15:48:13,604 - sglang - INFO - [2025-07-20 15:48:13 TP0] Decode batch. #running-req: 10, #token: 31879, token usage: 0.84, gen throughput (token/s): 402.98, #queue-req: 499
  16505. 2025-07-20 15:48:13,604 - __main__ - INFO - sglang running req: 10 queue req: 499
  16506. 2025-07-20 15:48:14,147 - sglang - INFO - [2025-07-20 15:48:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2365, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.77, #running-req: 9, #queue-req: 498
  16507. 2025-07-20 15:48:14,148 - __main__ - INFO - sglang running req: 9 queue req: 498
  16508. 2025-07-20 15:48:14,754 - __main__ - INFO - Queue remaining: 0
  16509. 2025-07-20 15:48:14,754 - __main__ - INFO -
  16510. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16511. ----------------------------------------------------------------------------------
  16512. finished_input_tokens 29.53 137.88
  16513. finished_output_tokens 11.32 52.86
  16514. sglang_input_tokens 906.42 855.53
  16515. sglang_output_tokens 259.08 254.82
  16516. 2025-07-20 15:48:14,754 - __main__ - INFO -
  16517. Worker ID | errored | finished | started
  16518. ----------+---------+----------+--------
  16519. 0 | 0 | 497 | 500
  16520. 1 | 0 | 10 | 10
  16521. 2 | 0 | 5 | 5
  16522. 3 | 2 | 22 | 529
  16523. 2025-07-20 15:48:14,978 - sglang - INFO - [2025-07-20 15:48:14 TP0] Prefill batch. #new-seq: 1, #new-token: 2442, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 497
  16524. 2025-07-20 15:48:14,978 - __main__ - INFO - sglang running req: 9 queue req: 497
  16525. 2025-07-20 15:48:16,188 - sglang - INFO - [2025-07-20 15:48:16 TP0] Decode batch. #running-req: 10, #token: 31196, token usage: 0.82, gen throughput (token/s): 154.00, #queue-req: 497
  16526. 2025-07-20 15:48:16,189 - __main__ - INFO - sglang running req: 10 queue req: 497
  16527. 2025-07-20 15:48:17,184 - sglang - INFO - [2025-07-20 15:48:17 TP0] Decode batch. #running-req: 10, #token: 31596, token usage: 0.83, gen throughput (token/s): 401.78, #queue-req: 497
  16528. 2025-07-20 15:48:17,184 - __main__ - INFO - sglang running req: 10 queue req: 497
  16529. 2025-07-20 15:48:18,181 - sglang - INFO - [2025-07-20 15:48:18 TP0] Decode batch. #running-req: 10, #token: 31996, token usage: 0.84, gen throughput (token/s): 401.23, #queue-req: 497
  16530. 2025-07-20 15:48:18,181 - __main__ - INFO - sglang running req: 10 queue req: 497
  16531. 2025-07-20 15:48:18,256 - sglang - INFO - [2025-07-20 15:48:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2877, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 9, #queue-req: 496
  16532. 2025-07-20 15:48:18,256 - __main__ - INFO - sglang running req: 9 queue req: 496
  16533. 2025-07-20 15:48:19,841 - sglang - INFO - [2025-07-20 15:48:19 TP0] Prefill batch. #new-seq: 1, #new-token: 2407, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 495
  16534. 2025-07-20 15:48:19,842 - __main__ - INFO - sglang running req: 9 queue req: 495
  16535. 2025-07-20 15:48:20,807 - sglang - INFO - [2025-07-20 15:48:20 TP0] Decode batch. #running-req: 10, #token: 30299, token usage: 0.80, gen throughput (token/s): 151.57, #queue-req: 495
  16536. 2025-07-20 15:48:20,807 - __main__ - INFO - sglang running req: 10 queue req: 495
  16537. 2025-07-20 15:48:21,791 - sglang - INFO - [2025-07-20 15:48:21 TP0] Decode batch. #running-req: 10, #token: 30699, token usage: 0.81, gen throughput (token/s): 406.49, #queue-req: 495
  16538. 2025-07-20 15:48:21,791 - __main__ - INFO - sglang running req: 10 queue req: 495
  16539. 2025-07-20 15:48:22,782 - sglang - INFO - [2025-07-20 15:48:22 TP0] Decode batch. #running-req: 10, #token: 31099, token usage: 0.82, gen throughput (token/s): 403.58, #queue-req: 495
  16540. 2025-07-20 15:48:22,782 - __main__ - INFO - sglang running req: 10 queue req: 495
  16541. 2025-07-20 15:48:22,906 - sglang - INFO - [2025-07-20 15:48:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2703, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 9, #queue-req: 494
  16542. 2025-07-20 15:48:22,906 - __main__ - INFO - sglang running req: 9 queue req: 494
  16543. 2025-07-20 15:48:24,603 - sglang - INFO - [2025-07-20 15:48:24 TP0] Decode batch. #running-req: 10, #token: 30690, token usage: 0.81, gen throughput (token/s): 219.08, #queue-req: 494
  16544. 2025-07-20 15:48:24,603 - __main__ - INFO - sglang running req: 10 queue req: 494
  16545. 2025-07-20 15:48:24,756 - __main__ - INFO - Queue remaining: 0
  16546. 2025-07-20 15:48:24,756 - __main__ - INFO -
  16547. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16548. ----------------------------------------------------------------------------------
  16549. finished_input_tokens 29.32 137.88
  16550. finished_output_tokens 11.24 52.86
  16551. sglang_input_tokens 907.82 860.97
  16552. sglang_output_tokens 259.20 255.92
  16553. 2025-07-20 15:48:24,756 - __main__ - INFO -
  16554. Worker ID | errored | finished | started
  16555. ----------+---------+----------+--------
  16556. 0 | 0 | 497 | 500
  16557. 1 | 0 | 10 | 10
  16558. 2 | 0 | 5 | 5
  16559. 3 | 2 | 26 | 529
  16560. 2025-07-20 15:48:25,589 - sglang - INFO - [2025-07-20 15:48:25 TP0] Decode batch. #running-req: 10, #token: 31090, token usage: 0.82, gen throughput (token/s): 405.84, #queue-req: 494
  16561. 2025-07-20 15:48:25,589 - __main__ - INFO - sglang running req: 10 queue req: 494
  16562. 2025-07-20 15:48:26,576 - sglang - INFO - [2025-07-20 15:48:26 TP0] Decode batch. #running-req: 10, #token: 31490, token usage: 0.83, gen throughput (token/s): 405.25, #queue-req: 494
  16563. 2025-07-20 15:48:26,576 - __main__ - INFO - sglang running req: 10 queue req: 494
  16564. 2025-07-20 15:48:27,095 - sglang - INFO - [2025-07-20 15:48:27 TP0] Prefill batch. #new-seq: 1, #new-token: 3260, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.75, #running-req: 9, #queue-req: 493
  16565. 2025-07-20 15:48:27,095 - __main__ - INFO - sglang running req: 9 queue req: 493
  16566. 2025-07-20 15:48:28,503 - sglang - INFO - [2025-07-20 15:48:28 TP0] Decode batch. #running-req: 9, #token: 29092, token usage: 0.77, gen throughput (token/s): 205.51, #queue-req: 493
  16567. 2025-07-20 15:48:28,503 - __main__ - INFO - sglang running req: 9 queue req: 493
  16568. 2025-07-20 15:48:29,477 - sglang - INFO - [2025-07-20 15:48:29 TP0] Decode batch. #running-req: 9, #token: 29452, token usage: 0.78, gen throughput (token/s): 369.34, #queue-req: 493
  16569. 2025-07-20 15:48:29,478 - __main__ - INFO - sglang running req: 9 queue req: 493
  16570. 2025-07-20 15:48:30,519 - sglang - INFO - [2025-07-20 15:48:30 TP0] Decode batch. #running-req: 9, #token: 29812, token usage: 0.78, gen throughput (token/s): 345.47, #queue-req: 493
  16571. 2025-07-20 15:48:30,520 - __main__ - INFO - sglang running req: 9 queue req: 493
  16572. 2025-07-20 15:48:31,509 - sglang - INFO - [2025-07-20 15:48:31 TP0] Decode batch. #running-req: 9, #token: 30172, token usage: 0.79, gen throughput (token/s): 363.61, #queue-req: 493
  16573. 2025-07-20 15:48:31,510 - __main__ - INFO - sglang running req: 9 queue req: 493
  16574. 2025-07-20 15:48:31,631 - sglang - INFO - [2025-07-20 15:48:31 TP0] Prefill batch. #new-seq: 1, #new-token: 3887, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 8, #queue-req: 492
  16575. 2025-07-20 15:48:31,632 - __main__ - INFO - sglang running req: 8 queue req: 492
  16576. 2025-07-20 15:48:33,025 - sglang - INFO - [2025-07-20 15:48:33 TP0] Prefill batch. #new-seq: 1, #new-token: 2977, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 491
  16577. 2025-07-20 15:48:33,025 - __main__ - INFO - sglang running req: 8 queue req: 491
  16578. 2025-07-20 15:48:34,466 - sglang - INFO - [2025-07-20 15:48:34 TP0] Decode batch. #running-req: 9, #token: 30158, token usage: 0.79, gen throughput (token/s): 121.10, #queue-req: 491
  16579. 2025-07-20 15:48:34,466 - __main__ - INFO - sglang running req: 9 queue req: 491
  16580. 2025-07-20 15:48:34,757 - __main__ - INFO - Queue remaining: 0
  16581. 2025-07-20 15:48:34,757 - __main__ - INFO -
  16582. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16583. ----------------------------------------------------------------------------------
  16584. finished_input_tokens 29.11 137.88
  16585. finished_output_tokens 11.16 52.86
  16586. sglang_input_tokens 909.07 861.92
  16587. sglang_output_tokens 259.10 256.07
  16588. 2025-07-20 15:48:34,757 - __main__ - INFO -
  16589. Worker ID | errored | finished | started
  16590. ----------+---------+----------+--------
  16591. 0 | 0 | 497 | 500
  16592. 1 | 0 | 10 | 10
  16593. 2 | 0 | 5 | 5
  16594. 3 | 2 | 30 | 529
  16595. 2025-07-20 15:48:35,103 - sglang - INFO - [2025-07-20 15:48:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2152, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 490
  16596. 2025-07-20 15:48:35,104 - __main__ - INFO - sglang running req: 8 queue req: 490
  16597. 2025-07-20 15:48:36,154 - sglang - INFO - [2025-07-20 15:48:36 TP0] Decode batch. #running-req: 9, #token: 29666, token usage: 0.78, gen throughput (token/s): 212.67, #queue-req: 490
  16598. 2025-07-20 15:48:36,154 - __main__ - INFO - sglang running req: 9 queue req: 490
  16599. 2025-07-20 15:48:37,131 - sglang - INFO - [2025-07-20 15:48:37 TP0] Decode batch. #running-req: 9, #token: 30026, token usage: 0.79, gen throughput (token/s): 368.24, #queue-req: 490
  16600. 2025-07-20 15:48:37,132 - __main__ - INFO - sglang running req: 9 queue req: 490
  16601. 2025-07-20 15:48:37,841 - sglang - INFO - [2025-07-20 15:48:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2571, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.69, #running-req: 8, #queue-req: 489
  16602. 2025-07-20 15:48:37,841 - __main__ - INFO - sglang running req: 8 queue req: 489
  16603. 2025-07-20 15:48:38,958 - sglang - INFO - [2025-07-20 15:48:38 TP0] Decode batch. #running-req: 9, #token: 28848, token usage: 0.76, gen throughput (token/s): 196.57, #queue-req: 489
  16604. 2025-07-20 15:48:38,958 - __main__ - INFO - sglang running req: 9 queue req: 489
  16605. 2025-07-20 15:48:39,006 - sglang - INFO - [2025-07-20 15:48:39 TP0] Prefill batch. #new-seq: 2, #new-token: 3412, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 8, #queue-req: 487
  16606. 2025-07-20 15:48:39,006 - __main__ - INFO - sglang running req: 8 queue req: 487
  16607. 2025-07-20 15:48:40,664 - sglang - INFO - [2025-07-20 15:48:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 486
  16608. 2025-07-20 15:48:40,664 - __main__ - INFO - sglang running req: 9 queue req: 486
  16609. 2025-07-20 15:48:41,961 - sglang - INFO - [2025-07-20 15:48:41 TP0] Decode batch. #running-req: 9, #token: 24834, token usage: 0.65, gen throughput (token/s): 131.52, #queue-req: 486
  16610. 2025-07-20 15:48:41,961 - __main__ - INFO - sglang running req: 9 queue req: 486
  16611. 2025-07-20 15:48:41,961 - sglang - INFO - [2025-07-20 15:48:41 TP0] Prefill batch. #new-seq: 1, #new-token: 3680, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 485
  16612. 2025-07-20 15:48:41,961 - __main__ - INFO - sglang running req: 9 queue req: 485
  16613. 2025-07-20 15:48:43,582 - sglang - INFO - [2025-07-20 15:48:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2449, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 9, #queue-req: 484
  16614. 2025-07-20 15:48:43,582 - __main__ - INFO - sglang running req: 9 queue req: 484
  16615. 2025-07-20 15:48:44,619 - sglang - INFO - [2025-07-20 15:48:44 TP0] Prefill batch. #new-seq: 1, #new-token: 3605, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 9, #queue-req: 483
  16616. 2025-07-20 15:48:44,619 - __main__ - INFO - sglang running req: 9 queue req: 483
  16617. 2025-07-20 15:48:44,758 - __main__ - INFO - Queue remaining: 0
  16618. 2025-07-20 15:48:44,758 - __main__ - INFO -
  16619. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16620. ----------------------------------------------------------------------------------
  16621. finished_input_tokens 28.91 137.88
  16622. finished_output_tokens 11.08 52.86
  16623. sglang_input_tokens 914.85 875.84
  16624. sglang_output_tokens 260.53 261.63
  16625. 2025-07-20 15:48:44,758 - __main__ - INFO -
  16626. Worker ID | errored | finished | started
  16627. ----------+---------+----------+--------
  16628. 0 | 0 | 497 | 500
  16629. 1 | 0 | 10 | 10
  16630. 2 | 0 | 5 | 5
  16631. 3 | 2 | 37 | 529
  16632. 2025-07-20 15:48:45,792 - sglang - INFO - [2025-07-20 15:48:45 TP0] Decode batch. #running-req: 10, #token: 30298, token usage: 0.80, gen throughput (token/s): 103.89, #queue-req: 483
  16633. 2025-07-20 15:48:45,792 - __main__ - INFO - sglang running req: 10 queue req: 483
  16634. 2025-07-20 15:48:46,779 - sglang - INFO - [2025-07-20 15:48:46 TP0] Decode batch. #running-req: 10, #token: 30698, token usage: 0.81, gen throughput (token/s): 405.36, #queue-req: 483
  16635. 2025-07-20 15:48:46,779 - __main__ - INFO - sglang running req: 10 queue req: 483
  16636. 2025-07-20 15:48:47,766 - sglang - INFO - [2025-07-20 15:48:47 TP0] Decode batch. #running-req: 10, #token: 31098, token usage: 0.82, gen throughput (token/s): 405.24, #queue-req: 483
  16637. 2025-07-20 15:48:47,766 - __main__ - INFO - sglang running req: 10 queue req: 483
  16638. 2025-07-20 15:48:48,755 - sglang - INFO - [2025-07-20 15:48:48 TP0] Decode batch. #running-req: 10, #token: 31498, token usage: 0.83, gen throughput (token/s): 404.35, #queue-req: 483
  16639. 2025-07-20 15:48:48,756 - __main__ - INFO - sglang running req: 10 queue req: 483
  16640. 2025-07-20 15:48:49,746 - sglang - INFO - [2025-07-20 15:48:49 TP0] Decode batch. #running-req: 10, #token: 31898, token usage: 0.84, gen throughput (token/s): 403.72, #queue-req: 483
  16641. 2025-07-20 15:48:49,746 - __main__ - INFO - sglang running req: 10 queue req: 483
  16642. 2025-07-20 15:48:50,737 - sglang - INFO - [2025-07-20 15:48:50 TP0] Decode batch. #running-req: 10, #token: 32298, token usage: 0.85, gen throughput (token/s): 403.84, #queue-req: 483
  16643. 2025-07-20 15:48:50,737 - __main__ - INFO - sglang running req: 10 queue req: 483
  16644. 2025-07-20 15:48:51,725 - sglang - INFO - [2025-07-20 15:48:51 TP0] Decode batch. #running-req: 10, #token: 32698, token usage: 0.86, gen throughput (token/s): 404.52, #queue-req: 483
  16645. 2025-07-20 15:48:51,727 - __main__ - INFO - sglang running req: 10 queue req: 483
  16646. 2025-07-20 15:48:52,717 - sglang - INFO - [2025-07-20 15:48:52 TP0] Decode batch. #running-req: 10, #token: 33098, token usage: 0.87, gen throughput (token/s): 403.49, #queue-req: 483
  16647. 2025-07-20 15:48:52,717 - __main__ - INFO - sglang running req: 10 queue req: 483
  16648. 2025-07-20 15:48:53,613 - sglang - INFO - [2025-07-20 15:48:53 TP0] Prefill batch. #new-seq: 1, #new-token: 2289, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.79, #running-req: 9, #queue-req: 482
  16649. 2025-07-20 15:48:53,613 - __main__ - INFO - sglang running req: 9 queue req: 482
  16650. 2025-07-20 15:48:54,440 - sglang - INFO - [2025-07-20 15:48:54 TP0] Decode batch. #running-req: 10, #token: 32272, token usage: 0.85, gen throughput (token/s): 231.56, #queue-req: 482
  16651. 2025-07-20 15:48:54,440 - __main__ - INFO - sglang running req: 10 queue req: 482
  16652. 2025-07-20 15:48:54,760 - __main__ - INFO - Queue remaining: 0
  16653. 2025-07-20 15:48:54,760 - __main__ - INFO -
  16654. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16655. ----------------------------------------------------------------------------------
  16656. finished_input_tokens 28.71 137.88
  16657. finished_output_tokens 11.01 52.86
  16658. sglang_input_tokens 910.56 853.52
  16659. sglang_output_tokens 259.10 257.10
  16660. 2025-07-20 15:48:54,760 - __main__ - INFO -
  16661. Worker ID | errored | finished | started
  16662. ----------+---------+----------+--------
  16663. 0 | 0 | 497 | 500
  16664. 1 | 0 | 10 | 10
  16665. 2 | 0 | 5 | 5
  16666. 3 | 2 | 38 | 529
  16667. 2025-07-20 15:48:55,431 - sglang - INFO - [2025-07-20 15:48:55 TP0] Decode batch. #running-req: 10, #token: 32672, token usage: 0.86, gen throughput (token/s): 403.38, #queue-req: 482
  16668. 2025-07-20 15:48:55,432 - __main__ - INFO - sglang running req: 10 queue req: 482
  16669. 2025-07-20 15:48:56,294 - sglang - INFO - [2025-07-20 15:48:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2634, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 8, #queue-req: 481
  16670. 2025-07-20 15:48:56,294 - __main__ - INFO - sglang running req: 8 queue req: 481
  16671. 2025-07-20 15:48:57,243 - sglang - INFO - [2025-07-20 15:48:57 TP0] Decode batch. #running-req: 9, #token: 30491, token usage: 0.80, gen throughput (token/s): 202.10, #queue-req: 481
  16672. 2025-07-20 15:48:57,243 - __main__ - INFO - sglang running req: 9 queue req: 481
  16673. 2025-07-20 15:48:57,586 - sglang - INFO - [2025-07-20 15:48:57 TP0] Prefill batch. #new-seq: 1, #new-token: 1150, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.70, #running-req: 8, #queue-req: 480
  16674. 2025-07-20 15:48:57,586 - __main__ - INFO - sglang running req: 8 queue req: 480
  16675. 2025-07-20 15:48:58,728 - sglang - INFO - [2025-07-20 15:48:58 TP0] Decode batch. #running-req: 9, #token: 27948, token usage: 0.74, gen throughput (token/s): 241.60, #queue-req: 480
  16676. 2025-07-20 15:48:58,729 - __main__ - INFO - sglang running req: 9 queue req: 480
  16677. 2025-07-20 15:48:59,092 - sglang - INFO - [2025-07-20 15:48:59 TP0] Prefill batch. #new-seq: 1, #new-token: 2815, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.71, #running-req: 8, #queue-req: 479
  16678. 2025-07-20 15:48:59,092 - __main__ - INFO - sglang running req: 8 queue req: 479
  16679. 2025-07-20 15:49:00,551 - sglang - INFO - [2025-07-20 15:49:00 TP0] Decode batch. #running-req: 9, #token: 29932, token usage: 0.79, gen throughput (token/s): 196.98, #queue-req: 479
  16680. 2025-07-20 15:49:00,551 - __main__ - INFO - sglang running req: 9 queue req: 479
  16681. 2025-07-20 15:49:01,536 - sglang - INFO - [2025-07-20 15:49:01 TP0] Decode batch. #running-req: 9, #token: 30292, token usage: 0.80, gen throughput (token/s): 365.35, #queue-req: 479
  16682. 2025-07-20 15:49:01,537 - __main__ - INFO - sglang running req: 9 queue req: 479
  16683. 2025-07-20 15:49:02,321 - sglang - INFO - [2025-07-20 15:49:02 TP0] Prefill batch. #new-seq: 1, #new-token: 4293, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 8, #queue-req: 478
  16684. 2025-07-20 15:49:02,322 - __main__ - INFO - sglang running req: 8 queue req: 478
  16685. 2025-07-20 15:49:03,682 - sglang - INFO - [2025-07-20 15:49:03 TP0] Decode batch. #running-req: 9, #token: 31731, token usage: 0.84, gen throughput (token/s): 167.28, #queue-req: 478
  16686. 2025-07-20 15:49:03,683 - __main__ - INFO - sglang running req: 9 queue req: 478
  16687. 2025-07-20 15:49:04,672 - sglang - INFO - [2025-07-20 15:49:04 TP0] Decode batch. #running-req: 9, #token: 32091, token usage: 0.84, gen throughput (token/s): 363.89, #queue-req: 478
  16688. 2025-07-20 15:49:04,672 - __main__ - INFO - sglang running req: 9 queue req: 478
  16689. 2025-07-20 15:49:04,761 - __main__ - INFO - Queue remaining: 0
  16690. 2025-07-20 15:49:04,762 - __main__ - INFO -
  16691. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16692. ----------------------------------------------------------------------------------
  16693. finished_input_tokens 28.51 137.88
  16694. finished_output_tokens 10.93 52.86
  16695. sglang_input_tokens 911.97 854.97
  16696. sglang_output_tokens 259.03 254.68
  16697. 2025-07-20 15:49:04,762 - __main__ - INFO -
  16698. Worker ID | errored | finished | started
  16699. ----------+---------+----------+--------
  16700. 0 | 0 | 497 | 500
  16701. 1 | 0 | 10 | 10
  16702. 2 | 0 | 5 | 5
  16703. 3 | 2 | 43 | 529
  16704. 2025-07-20 15:49:05,660 - sglang - INFO - [2025-07-20 15:49:05 TP0] Decode batch. #running-req: 9, #token: 29351, token usage: 0.77, gen throughput (token/s): 364.33, #queue-req: 478
  16705. 2025-07-20 15:49:05,660 - __main__ - INFO - sglang running req: 9 queue req: 478
  16706. 2025-07-20 15:49:06,590 - sglang - INFO - [2025-07-20 15:49:06 TP0] Decode batch. #running-req: 8, #token: 29671, token usage: 0.78, gen throughput (token/s): 343.76, #queue-req: 478
  16707. 2025-07-20 15:49:06,591 - __main__ - INFO - sglang running req: 8 queue req: 478
  16708. 2025-07-20 15:49:06,777 - sglang - INFO - [2025-07-20 15:49:06 TP0] Prefill batch. #new-seq: 1, #new-token: 6873, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.67, #running-req: 7, #queue-req: 477
  16709. 2025-07-20 15:49:06,778 - __main__ - INFO - sglang running req: 7 queue req: 477
  16710. 2025-07-20 15:49:08,736 - sglang - INFO - [2025-07-20 15:49:08 TP0] Prefill batch. #new-seq: 1, #new-token: 4350, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 476
  16711. 2025-07-20 15:49:08,736 - __main__ - INFO - sglang running req: 7 queue req: 476
  16712. 2025-07-20 15:49:10,105 - sglang - INFO - [2025-07-20 15:49:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2543, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.73, #running-req: 7, #queue-req: 475
  16713. 2025-07-20 15:49:10,105 - __main__ - INFO - sglang running req: 7 queue req: 475
  16714. 2025-07-20 15:49:11,179 - sglang - INFO - [2025-07-20 15:49:11 TP0] Decode batch. #running-req: 8, #token: 30209, token usage: 0.80, gen throughput (token/s): 69.08, #queue-req: 475
  16715. 2025-07-20 15:49:11,180 - __main__ - INFO - sglang running req: 8 queue req: 475
  16716. 2025-07-20 15:49:12,045 - sglang - INFO - [2025-07-20 15:49:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2926, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.72, #running-req: 7, #queue-req: 474
  16717. 2025-07-20 15:49:12,045 - __main__ - INFO - sglang running req: 7 queue req: 474
  16718. 2025-07-20 15:49:12,987 - sglang - INFO - [2025-07-20 15:49:12 TP0] Decode batch. #running-req: 8, #token: 30233, token usage: 0.80, gen throughput (token/s): 176.49, #queue-req: 474
  16719. 2025-07-20 15:49:12,987 - __main__ - INFO - sglang running req: 8 queue req: 474
  16720. 2025-07-20 15:49:13,920 - sglang - INFO - [2025-07-20 15:49:13 TP0] Decode batch. #running-req: 8, #token: 30553, token usage: 0.80, gen throughput (token/s): 342.84, #queue-req: 474
  16721. 2025-07-20 15:49:13,920 - __main__ - INFO - sglang running req: 8 queue req: 474
  16722. 2025-07-20 15:49:14,763 - __main__ - INFO - Queue remaining: 0
  16723. 2025-07-20 15:49:14,764 - __main__ - INFO -
  16724. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  16725. ----------------------------------------------------------------------------------
  16726. finished_input_tokens 28.31 137.88
  16727. finished_output_tokens 10.85 52.86
  16728. sglang_input_tokens 916.69 884.77
  16729. sglang_output_tokens 259.89 262.48
  16730. 2025-07-20 15:49:14,764 - __main__ - INFO -
  16731. Worker ID | errored | finished | started
  16732. ----------+---------+----------+--------
  16733. 0 | 0 | 497 | 500
  16734. 1 | 0 | 10 | 10
  16735. 2 | 0 | 5 | 5
  16736. 3 | 2 | 48 | 529
  16737. 2025-07-20 15:49:14,854 - sglang - INFO - [2025-07-20 15:49:14 TP0] Decode batch. #running-req: 8, #token: 30873, token usage: 0.81, gen throughput (token/s): 342.70, #queue-req: 474
  16738. 2025-07-20 15:49:14,854 - __main__ - INFO - sglang running req: 8 queue req: 474
  16739. 2025-07-20 15:49:15,791 - sglang - INFO - [2025-07-20 15:49:15 TP0] Decode batch. #running-req: 8, #token: 31193, token usage: 0.82, gen throughput (token/s): 341.60, #queue-req: 474
  16740. 2025-07-20 15:49:15,791 - __main__ - INFO - sglang running req: 8 queue req: 474
  16741. 2025-07-20 15:49:16,400 - sglang - INFO - [2025-07-20 15:49:16 TP0] Prefill batch. #new-seq: 1, #new-token: 2657, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 473
  16742. 2025-07-20 15:49:16,400 - __main__ - INFO - sglang running req: 7 queue req: 473
  16743. 2025-07-20 15:49:17,552 - sglang - INFO - [2025-07-20 15:49:17 TP0] Decode batch. #running-req: 8, #token: 30904, token usage: 0.81, gen throughput (token/s): 181.06, #queue-req: 473
  16744. 2025-07-20 15:49:17,553 - __main__ - INFO - sglang running req: 8 queue req: 473
  16745. 2025-07-20 15:49:18,184 - sglang - INFO - [2025-07-20 15:49:18 TP0] Prefill batch. #new-seq: 1, #new-token: 2659, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.74, #running-req: 7, #queue-req: 472
  16746. 2025-07-20 15:49:18,184 - __main__ - INFO - sglang running req: 7 queue req: 472
  16747. 2025-07-20 15:49:19,113 - __main__ - INFO - Process page scripts/data/11445200MB2D6222364440125017008.pdf-13 cancelled
  16748. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page scripts/data/11445224007035644H44421110A0001.pdf-3 cancelled
  16749. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page scripts/data/11445200MB2C47380T4440125017008 (1).pdf-12 cancelled
  16750. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/guidebook_failed_pages.pdf-3 cancelled
  16751. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-3 cancelled
  16752. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-17 cancelled
  16753. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-1 cancelled
  16754. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-27 cancelled
  16755. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-15 cancelled
  16756. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-33 cancelled
  16757. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-13 cancelled
  16758. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-4 cancelled
  16759. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-23 cancelled
  16760. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-40 cancelled
  16761. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-34 cancelled
  16762. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-8 cancelled
  16763. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-20 cancelled
  16764. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-35 cancelled
  16765. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-7 cancelled
  16766. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-25 cancelled
  16767. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-2 cancelled
  16768. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-31 cancelled
  16769. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-22 cancelled
  16770. 2025-07-20 15:49:19,114 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-36 cancelled
  16771. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-21 cancelled
  16772. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-37 cancelled
  16773. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-14 cancelled
  16774. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-39 cancelled
  16775. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-19 cancelled
  16776. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-32 cancelled
  16777. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-10 cancelled
  16778. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-18 cancelled
  16779. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-28 cancelled
  16780. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-11 cancelled
  16781. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-24 cancelled
  16782. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-26 cancelled
  16783. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-29 cancelled
  16784. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-6 cancelled
  16785. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-16 cancelled
  16786. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-9 cancelled
  16787. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-30 cancelled
  16788. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-5 cancelled
  16789. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-38 cancelled
  16790. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/ti89_guidebook_programming.pdf-12 cancelled
  16791. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-11 cancelled
  16792. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-25 cancelled
  16793. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-19 cancelled
  16794. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-10 cancelled
  16795. 2025-07-20 15:49:19,115 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-26 cancelled
  16796. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-24 cancelled
  16797. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-18 cancelled
  16798. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-4 cancelled
  16799. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-12 cancelled
  16800. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-14 cancelled
  16801. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-2 cancelled
  16802. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-27 cancelled
  16803. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-16 cancelled
  16804. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-5 cancelled
  16805. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-3 cancelled
  16806. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-23 cancelled
  16807. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-13 cancelled
  16808. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-7 cancelled
  16809. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-22 cancelled
  16810. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-6 cancelled
  16811. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-1 cancelled
  16812. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-8 cancelled
  16813. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-15 cancelled
  16814. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-21 cancelled
  16815. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-9 cancelled
  16816. 2025-07-20 15:49:19,116 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-20 cancelled
  16817. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint1.pdf-17 cancelled
  16818. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-46 cancelled
  16819. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-47 cancelled
  16820. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-33 cancelled
  16821. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-48 cancelled
  16822. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-38 cancelled
  16823. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-39 cancelled
  16824. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-12 cancelled
  16825. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-42 cancelled
  16826. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-26 cancelled
  16827. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-43 cancelled
  16828. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-44 cancelled
  16829. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/bws_book_ch2.pdf-45 cancelled
  16830. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-2 cancelled
  16831. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/handwriting_bad_ocr.pdf-1 cancelled
  16832. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-5 cancelled
  16833. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-2 cancelled
  16834. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-1 cancelled
  16835. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-4 cancelled
  16836. 2025-07-20 15:49:19,117 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint2.pdf-3 cancelled
  16837. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-12 cancelled
  16838. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-23 cancelled
  16839. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-13 cancelled
  16840. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-24 cancelled
  16841. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-14 cancelled
  16842. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-25 cancelled
  16843. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-3 cancelled
  16844. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-1 cancelled
  16845. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-15 cancelled
  16846. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-17 cancelled
  16847. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-8 cancelled
  16848. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-26 cancelled
  16849. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-2 cancelled
  16850. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-16 cancelled
  16851. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-18 cancelled
  16852. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-19 cancelled
  16853. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-5 cancelled
  16854. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-22 cancelled
  16855. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-9 cancelled
  16856. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-20 cancelled
  16857. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-6 cancelled
  16858. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-10 cancelled
  16859. 2025-07-20 15:49:19,118 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-21 cancelled
  16860. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-7 cancelled
  16861. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_good_some_pages_should_get_filtered.pdf-11 cancelled
  16862. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-1 cancelled
  16863. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-4 cancelled
  16864. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-2 cancelled
  16865. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/large_prompt_hint3.pdf-3 cancelled
  16866. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-1 cancelled
  16867. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-8 cancelled
  16868. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-61 cancelled
  16869. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-18 cancelled
  16870. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-40 cancelled
  16871. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-30 cancelled
  16872. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-51 cancelled
  16873. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-9 cancelled
  16874. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-62 cancelled
  16875. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-19 cancelled
  16876. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-41 cancelled
  16877. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-31 cancelled
  16878. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-52 cancelled
  16879. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-10 cancelled
  16880. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-63 cancelled
  16881. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-20 cancelled
  16882. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-42 cancelled
  16883. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-32 cancelled
  16884. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-53 cancelled
  16885. 2025-07-20 15:49:19,119 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-11 cancelled
  16886. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-64 cancelled
  16887. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-21 cancelled
  16888. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-43 cancelled
  16889. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-33 cancelled
  16890. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-54 cancelled
  16891. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-12 cancelled
  16892. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-65 cancelled
  16893. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-22 cancelled
  16894. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-44 cancelled
  16895. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-2 cancelled
  16896. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-34 cancelled
  16897. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-55 cancelled
  16898. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-13 cancelled
  16899. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-66 cancelled
  16900. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-23 cancelled
  16901. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-45 cancelled
  16902. 2025-07-20 15:49:19,120 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-3 cancelled
  16903. 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-35 cancelled
  16904. 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-56 cancelled
  16905. 2025-07-20 15:49:19,134 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-14 cancelled
  16906. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-67 cancelled
  16907. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-24 cancelled
  16908. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-46 cancelled
  16909. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-4 cancelled
  16910. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-36 cancelled
  16911. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-50 cancelled
  16912. 2025-07-20 15:49:19,135 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-57 cancelled
  16913. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-15 cancelled
  16914. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-68 cancelled
  16915. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-25 cancelled
  16916. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-47 cancelled
  16917. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-5 cancelled
  16918. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-37 cancelled
  16919. 2025-07-20 15:49:19,136 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-58 cancelled
  16920. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-16 cancelled
  16921. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-27 cancelled
  16922. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-48 cancelled
  16923. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-6 cancelled
  16924. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-38 cancelled
  16925. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-59 cancelled
  16926. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-17 cancelled
  16927. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-28 cancelled
  16928. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-49 cancelled
  16929. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-7 cancelled
  16930. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-39 cancelled
  16931. 2025-07-20 15:49:19,137 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-60 cancelled
  16932. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-26 cancelled
  16933. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/slideshow_mostly_images.pdf-29 cancelled
  16934. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-5 cancelled
  16935. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-8 cancelled
  16936. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-2 cancelled
  16937. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-6 cancelled
  16938. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-9 cancelled
  16939. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-4 cancelled
  16940. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-1 cancelled
  16941. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-7 cancelled
  16942. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing2.pdf-3 cancelled
  16943. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-6 cancelled
  16944. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-14 cancelled
  16945. 2025-07-20 15:49:19,138 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-1 cancelled
  16946. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-9 cancelled
  16947. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-4 cancelled
  16948. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-12 cancelled
  16949. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-7 cancelled
  16950. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-15 cancelled
  16951. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-16 cancelled
  16952. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-2 cancelled
  16953. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-10 cancelled
  16954. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-5 cancelled
  16955. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-13 cancelled
  16956. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-8 cancelled
  16957. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-3 cancelled
  16958. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/load_v_error.pdf-11 cancelled
  16959. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/small_page_size.pdf-1 cancelled
  16960. 2025-07-20 15:49:19,139 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-3 cancelled
  16961. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-7 cancelled
  16962. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-14 cancelled
  16963. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-25 cancelled
  16964. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-1 cancelled
  16965. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-11 cancelled
  16966. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-27 cancelled
  16967. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-10 cancelled
  16968. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-28 cancelled
  16969. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-5 cancelled
  16970. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-8 cancelled
  16971. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-29 cancelled
  16972. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-12 cancelled
  16973. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-13 cancelled
  16974. 2025-07-20 15:49:19,140 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-9 cancelled
  16975. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-15 cancelled
  16976. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-17 cancelled
  16977. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-19 cancelled
  16978. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-4 cancelled
  16979. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-26 cancelled
  16980. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-21 cancelled
  16981. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-2 cancelled
  16982. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-20 cancelled
  16983. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-22 cancelled
  16984. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-6 cancelled
  16985. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-16 cancelled
  16986. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-23 cancelled
  16987. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-18 cancelled
  16988. 2025-07-20 15:49:19,141 - __main__ - INFO - Process page tests/gnarly_pdfs/discoverworld_crazy_tables.pdf-24 cancelled
  16989. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/olmo-page-1.pdf-1 cancelled
  16990. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-2 cancelled
  16991. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-8 cancelled
  16992. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-4 cancelled
  16993. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-6 cancelled
  16994. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-3 cancelled
  16995. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-1 cancelled
  16996. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-9 cancelled
  16997. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-5 cancelled
  16998. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_chem_tables.pdf-7 cancelled
  16999. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/some_ocr1.pdf-1 cancelled
  17000. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/dolma-page-1.pdf-1 cancelled
  17001. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-10 cancelled
  17002. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-23 cancelled
  17003. 2025-07-20 15:49:19,142 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-39 cancelled
  17004. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-27 cancelled
  17005. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-50 cancelled
  17006. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-1 cancelled
  17007. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-11 cancelled
  17008. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-24 cancelled
  17009. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-40 cancelled
  17010. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-28 cancelled
  17011. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-51 cancelled
  17012. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-15 cancelled
  17013. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-12 cancelled
  17014. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-41 cancelled
  17015. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-29 cancelled
  17016. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-52 cancelled
  17017. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-2 cancelled
  17018. 2025-07-20 15:49:19,143 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-16 cancelled
  17019. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-13 cancelled
  17020. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-42 cancelled
  17021. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-30 cancelled
  17022. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-53 cancelled
  17023. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-3 cancelled
  17024. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-17 cancelled
  17025. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-43 cancelled
  17026. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-31 cancelled
  17027. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-54 cancelled
  17028. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-4 cancelled
  17029. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-26 cancelled
  17030. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-14 cancelled
  17031. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-44 cancelled
  17032. 2025-07-20 15:49:19,144 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-32 cancelled
  17033. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-36 cancelled
  17034. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-5 cancelled
  17035. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-18 cancelled
  17036. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-45 cancelled
  17037. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-33 cancelled
  17038. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-6 cancelled
  17039. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-19 cancelled
  17040. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-46 cancelled
  17041. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-34 cancelled
  17042. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-7 cancelled
  17043. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-20 cancelled
  17044. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-47 cancelled
  17045. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-35 cancelled
  17046. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-8 cancelled
  17047. 2025-07-20 15:49:19,145 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-48 cancelled
  17048. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-25 cancelled
  17049. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-9 cancelled
  17050. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-22 cancelled
  17051. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/overrun_on_pg8.pdf-49 cancelled
  17052. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-4 cancelled
  17053. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-2 cancelled
  17054. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-5 cancelled
  17055. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-3 cancelled
  17056. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-6 cancelled
  17057. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/lots_of_sci_tables.pdf-1 cancelled
  17058. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/newspaper.pdf-1 cancelled
  17059. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-2 cancelled
  17060. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-10 cancelled
  17061. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-5 cancelled
  17062. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-13 cancelled
  17063. 2025-07-20 15:49:19,146 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-12 cancelled
  17064. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-8 cancelled
  17065. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-3 cancelled
  17066. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-11 cancelled
  17067. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-6 cancelled
  17068. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-14 cancelled
  17069. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-1 cancelled
  17070. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-9 cancelled
  17071. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-4 cancelled
  17072. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/pdftotext_two_column_issue.pdf-7 cancelled
  17073. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-4 cancelled
  17074. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-10 cancelled
  17075. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-6 cancelled
  17076. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-2 cancelled
  17077. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-8 cancelled
  17078. 2025-07-20 15:49:19,147 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-9 cancelled
  17079. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-7 cancelled
  17080. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-3 cancelled
  17081. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-5 cancelled
  17082. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/repeating_references_on_pg9_pg10.pdf-1 cancelled
  17083. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/skinnypage.pdf-1 cancelled
  17084. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-6 cancelled
  17085. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-1 cancelled
  17086. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-4 cancelled
  17087. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-7 cancelled
  17088. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-2 cancelled
  17089. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-5 cancelled
  17090. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-8 cancelled
  17091. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/not_parsing.pdf-3 cancelled
  17092. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-3 cancelled
  17093. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-6 cancelled
  17094. 2025-07-20 15:49:19,148 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-1 cancelled
  17095. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-4 cancelled
  17096. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-7 cancelled
  17097. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-2 cancelled
  17098. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/most_content_in_image_form.pdf-5 cancelled
  17099. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-2 cancelled
  17100. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-4 cancelled
  17101. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-8 cancelled
  17102. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-3 cancelled
  17103. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-6 cancelled
  17104. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-5 cancelled
  17105. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-9 cancelled
  17106. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-1 cancelled
  17107. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_pdf_pg9.pdf-7 cancelled
  17108. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-8 cancelled
  17109. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-5 cancelled
  17110. 2025-07-20 15:49:19,149 - __main__ - INFO - Process page tests/gnarly_pdfs/failing_anchor_pg4.pdf-3 cancelled
  17111. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-77 cancelled
  17112. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-43 cancelled
  17113. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-68 cancelled
  17114. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-78 cancelled
  17115. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-26 cancelled
  17116. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-61 cancelled
  17117. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-76 cancelled
  17118. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-36 cancelled
  17119. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-59 cancelled
  17120. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-80 cancelled
  17121. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-16 cancelled
  17122. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-64 cancelled
  17123. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-81 cancelled
  17124. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-46 cancelled
  17125. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-71 cancelled
  17126. 2025-07-20 15:49:19,150 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-82 cancelled
  17127. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-21 cancelled
  17128. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-65 cancelled
  17129. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-83 cancelled
  17130. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-33 cancelled
  17131. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-63 cancelled
  17132. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-84 cancelled
  17133. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-30 cancelled
  17134. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-69 cancelled
  17135. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-79 cancelled
  17136. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-31 cancelled
  17137. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-67 cancelled
  17138. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-73 cancelled
  17139. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-15 cancelled
  17140. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-74 cancelled
  17141. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-57 cancelled
  17142. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-70 cancelled
  17143. 2025-07-20 15:49:19,151 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-75 cancelled
  17144. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-19 cancelled
  17145. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-60 cancelled
  17146. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-72 cancelled
  17147. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-18 cancelled
  17148. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-53 cancelled
  17149. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-55 cancelled
  17150. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-23 cancelled
  17151. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-45 cancelled
  17152. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-27 cancelled
  17153. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-52 cancelled
  17154. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-94 cancelled
  17155. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-14 cancelled
  17156. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-66 cancelled
  17157. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-28 cancelled
  17158. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-9 cancelled
  17159. 2025-07-20 15:49:19,152 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-58 cancelled
  17160. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-90 cancelled
  17161. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-20 cancelled
  17162. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-56 cancelled
  17163. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-17 cancelled
  17164. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-51 cancelled
  17165. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-89 cancelled
  17166. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-44 cancelled
  17167. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-86 cancelled
  17168. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-25 cancelled
  17169. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-62 cancelled
  17170. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-39 cancelled
  17171. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-87 cancelled
  17172. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-12 cancelled
  17173. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-40 cancelled
  17174. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-88 cancelled
  17175. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-22 cancelled
  17176. 2025-07-20 15:49:19,153 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-96 cancelled
  17177. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-54 cancelled
  17178. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-6 cancelled
  17179. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-38 cancelled
  17180. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-2 cancelled
  17181. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-105 cancelled
  17182. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-37 cancelled
  17183. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-5 cancelled
  17184. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-106 cancelled
  17185. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-35 cancelled
  17186. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-10 cancelled
  17187. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-102 cancelled
  17188. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-42 cancelled
  17189. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-99 cancelled
  17190. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-34 cancelled
  17191. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-103 cancelled
  17192. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-47 cancelled
  17193. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-104 cancelled
  17194. 2025-07-20 15:49:19,154 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-50 cancelled
  17195. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-85 cancelled
  17196. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-7 cancelled
  17197. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-101 cancelled
  17198. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-24 cancelled
  17199. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-4 cancelled
  17200. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-92 cancelled
  17201. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-48 cancelled
  17202. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-11 cancelled
  17203. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-98 cancelled
  17204. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-41 cancelled
  17205. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-97 cancelled
  17206. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-29 cancelled
  17207. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-1 cancelled
  17208. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-91 cancelled
  17209. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-3 cancelled
  17210. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-100 cancelled
  17211. 2025-07-20 15:49:19,155 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-32 cancelled
  17212. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-93 cancelled
  17213. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-49 cancelled
  17214. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-8 cancelled
  17215. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-95 cancelled
  17216. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/instructions_and_schematics.pdf-13 cancelled
  17217. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-5 cancelled
  17218. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-8 cancelled
  17219. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-3 cancelled
  17220. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-10 cancelled
  17221. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-6 cancelled
  17222. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-1 cancelled
  17223. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-9 cancelled
  17224. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-4 cancelled
  17225. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-7 cancelled
  17226. 2025-07-20 15:49:19,156 - __main__ - INFO - Process page tests/gnarly_pdfs/form_on_later_pages.pdf-2 cancelled
  17227. 2025-07-20 15:49:19,157 - sglang - INFO - Process Process-2:
  17228. 2025-07-20 15:49:19,157 - sglang - INFO - Process Process-1:
  17229. 2025-07-20 15:49:19,157 - sglang - INFO - Traceback (most recent call last):
  17230. 2025-07-20 15:49:19,157 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
  17231. 2025-07-20 15:49:19,157 - sglang - INFO - self.run()
  17232. 2025-07-20 15:49:19,157 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
  17233. 2025-07-20 15:49:19,157 - sglang - INFO - self._target(*self._args, **self._kwargs)
  17234. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1784, in run_scheduler_process
  17235. 2025-07-20 15:49:19,158 - sglang - INFO - scheduler.event_loop_normal()
  17236. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
  17237. 2025-07-20 15:49:19,158 - sglang - INFO - return func(*args, **kwargs)
  17238. 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^
  17239. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 478, in event_loop_normal
  17240. 2025-07-20 15:49:19,158 - sglang - INFO - self.process_batch_result(batch, result)
  17241. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1101, in process_batch_result
  17242. 2025-07-20 15:49:19,158 - sglang - INFO - self.process_batch_result_decode(batch, result)
  17243. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1228, in process_batch_result_decode
  17244. 2025-07-20 15:49:19,158 - sglang - INFO - next_token_ids = next_token_ids.tolist()
  17245. 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^
  17246. 2025-07-20 15:49:19,158 - sglang - INFO - KeyboardInterrupt
  17247. 2025-07-20 15:49:19,158 - sglang - INFO - Traceback (most recent call last):
  17248. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
  17249. 2025-07-20 15:49:19,158 - sglang - INFO - self.run()
  17250. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/multiprocessing/process.py", line 108, in run
  17251. 2025-07-20 15:49:19,158 - sglang - INFO - self._target(*self._args, **self._kwargs)
  17252. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/detokenizer_manager.py", line 240, in run_detokenizer_process
  17253. 2025-07-20 15:49:19,158 - sglang - INFO - manager.event_loop()
  17254. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/sglang/srt/managers/detokenizer_manager.py", line 113, in event_loop
  17255. 2025-07-20 15:49:19,158 - sglang - INFO - recv_obj = self.recv_from_scheduler.recv_pyobj()
  17256. 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  17257. 2025-07-20 15:49:19,158 - sglang - INFO - File "/usr/local/miniconda3/envs/olmocr/lib/python3.11/site-packages/zmq/sugar/socket.py", line 989, in recv_pyobj
  17258. 2025-07-20 15:49:19,158 - sglang - INFO - msg = self.recv(flags)
  17259. 2025-07-20 15:49:19,158 - sglang - INFO - ^^^^^^^^^^^^^^^^
  17260. 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1147, in zmq.backend.cython._zmq.Socket.recv
  17261. 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1182, in zmq.backend.cython._zmq.Socket.recv
  17262. 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 1337, in zmq.backend.cython._zmq._recv_copy
  17263. 2025-07-20 15:49:19,158 - sglang - INFO - File "_zmq.py", line 169, in zmq.backend.cython._zmq._check_rc
  17264. 2025-07-20 15:49:19,158 - sglang - INFO - KeyboardInterrupt
  17265. 2025-07-20 15:49:19,164 - __main__ - INFO - Got cancellation request for SGLang server
  17266. 2025-07-20 15:50:09,151 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  17267. 2025-07-20 15:50:09,152 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  17268. 2025-07-20 15:50:09,152 - __main__ - INFO - Found 1 total pdf paths to add
  17269. 2025-07-20 15:50:09,158 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  17270. 2025-07-20 15:50:09,372 - __main__ - INFO - Starting pipeline with PID 599566
  17271. 2025-07-20 15:50:09,372 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  17272. 2025-07-20 15:50:14,457 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  17273. 2025-07-20 15:50:17,827 - sglang - INFO - [2025-07-20 15:50:17] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378345866, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17274. 2025-07-20 15:50:17,828 - __main__ - INFO - [2025-07-20 15:50:17] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30025, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378345866, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17275. 2025-07-20 15:50:18,989 - sglang - INFO - [2025-07-20 15:50:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17276. 2025-07-20 15:50:18,990 - __main__ - INFO - [2025-07-20 15:50:18] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17277. 2025-07-20 15:50:20,550 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  17278. 2025-07-20 15:50:24,963 - sglang - INFO - [2025-07-20 15:50:24 TP0] Overlap scheduler is disabled for multimodal models.
  17279. 2025-07-20 15:50:24,963 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Overlap scheduler is disabled for multimodal models.
  17280. 2025-07-20 15:50:24,965 - sglang - INFO - [2025-07-20 15:50:24 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17281. 2025-07-20 15:50:24,965 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17282. 2025-07-20 15:50:24,966 - sglang - INFO - [2025-07-20 15:50:24 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17283. 2025-07-20 15:50:24,966 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17284. 2025-07-20 15:50:24,966 - sglang - INFO - [2025-07-20 15:50:24 TP0] Init torch distributed begin.
  17285. 2025-07-20 15:50:24,966 - __main__ - INFO - [2025-07-20 15:50:24 TP0] Init torch distributed begin.
  17286. 2025-07-20 15:50:26,630 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  17287. 2025-07-20 15:50:30,629 - sglang - INFO - [2025-07-20 15:50:30 TP0] Load weight begin. avail mem=23.33 GB
  17288. 2025-07-20 15:50:30,629 - __main__ - INFO - [2025-07-20 15:50:30 TP0] Load weight begin. avail mem=23.33 GB
  17289. 2025-07-20 15:50:31,373 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17290. 2025-07-20 15:50:31,374 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17291. 2025-07-20 15:50:32,710 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  17292. 2025-07-20 15:50:38,837 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  17293. 2025-07-20 15:50:40,857 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.48s/it]
  17294. 2025-07-20 15:50:40,857 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.48s/it]
  17295. 2025-07-20 15:50:44,915 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  17296. 2025-07-20 15:50:50,576 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:19<00:19, 9.62s/it]
  17297. 2025-07-20 15:50:50,576 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:19<00:19, 9.62s/it]
  17298. 2025-07-20 15:50:50,994 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  17299. 2025-07-20 15:50:57,073 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  17300. 2025-07-20 15:51:02,104 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:10, 10.49s/it]
  17301. 2025-07-20 15:51:02,104 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:30<00:10, 10.49s/it]
  17302. 2025-07-20 15:51:03,151 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  17303. 2025-07-20 15:51:05,867 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 7.84s/it]
  17304. 2025-07-20 15:51:05,867 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 7.84s/it]
  17305. 2025-07-20 15:51:05,867 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 8.62s/it]
  17306. 2025-07-20 15:51:05,867 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:34<00:00, 8.62s/it]
  17307. 2025-07-20 15:51:05,867 - sglang - INFO -
  17308. 2025-07-20 15:51:05,867 - __main__ - INFO -
  17309. 2025-07-20 15:51:05,930 - sglang - INFO - [2025-07-20 15:51:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17310. 2025-07-20 15:51:05,930 - __main__ - INFO - [2025-07-20 15:51:05 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17311. 2025-07-20 15:51:05,937 - sglang - INFO - [2025-07-20 15:51:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17312. 2025-07-20 15:51:05,937 - __main__ - INFO - [2025-07-20 15:51:05 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17313. 2025-07-20 15:51:05,938 - sglang - INFO - [2025-07-20 15:51:05 TP0] Memory pool end. avail mem=5.30 GB
  17314. 2025-07-20 15:51:05,938 - __main__ - INFO - [2025-07-20 15:51:05 TP0] Memory pool end. avail mem=5.30 GB
  17315. 2025-07-20 15:51:06,148 - sglang - INFO - [2025-07-20 15:51:06 TP0] Capture cuda graph begin. This can take up to several minutes.
  17316. 2025-07-20 15:51:06,148 - __main__ - INFO - [2025-07-20 15:51:06 TP0] Capture cuda graph begin. This can take up to several minutes.
  17317. 2025-07-20 15:51:08,325 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.28s/it] 50%|█████ | 2/4 [00:01<00:01, 1.44it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.93it/s] 100%|██████████| 4/4 [00:02<00:00, 2.31it/s] 100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
  17318. 2025-07-20 15:51:08,325 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.28s/it] 50%|█████ | 2/4 [00:01<00:01, 1.44it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.93it/s] 100%|██████████| 4/4 [00:02<00:00, 2.31it/s] 100%|██████████| 4/4 [00:02<00:00, 1.84it/s]
  17319. 2025-07-20 15:51:08,325 - sglang - INFO - [2025-07-20 15:51:08 TP0] Capture cuda graph end. Time elapsed: 2.18 s
  17320. 2025-07-20 15:51:08,325 - __main__ - INFO - [2025-07-20 15:51:08 TP0] Capture cuda graph end. Time elapsed: 2.18 s
  17321. 2025-07-20 15:51:09,104 - sglang - INFO - [2025-07-20 15:51:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17322. 2025-07-20 15:51:09,104 - __main__ - INFO - [2025-07-20 15:51:09 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17323. 2025-07-20 15:51:09,230 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  17324. 2025-07-20 15:51:09,231 - sglang - INFO - [2025-07-20 15:51:09] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30025): address already in use
  17325. 2025-07-20 15:51:09,231 - __main__ - INFO - [2025-07-20 15:51:09] ERROR: [Errno 98] error while attempting to bind on address ('127.0.0.1', 30025): address already in use
  17326. 2025-07-20 15:51:15,310 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  17327. 2025-07-20 15:51:21,390 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  17328. 2025-07-20 15:51:27,427 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  17329. 2025-07-20 15:51:33,506 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  17330. 2025-07-20 15:51:39,584 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  17331. 2025-07-20 15:51:45,663 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  17332. 2025-07-20 15:51:51,742 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  17333. 2025-07-20 15:51:57,848 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  17334. 2025-07-20 15:52:03,922 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  17335. 2025-07-20 15:52:09,999 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  17336. 2025-07-20 15:52:16,134 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  17337. 2025-07-20 15:52:16,544 - __main__ - INFO - Got cancellation request for SGLang server
  17338. 2025-07-20 15:53:17,272 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  17339. 2025-07-20 15:53:17,272 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  17340. 2025-07-20 15:53:17,272 - __main__ - INFO - Found 1 total pdf paths to add
  17341. 2025-07-20 15:53:17,275 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  17342. 2025-07-20 15:53:17,480 - __main__ - INFO - Starting pipeline with PID 600445
  17343. 2025-07-20 15:53:17,480 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  17344. 2025-07-20 15:53:17,585 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  17345. 2025-07-20 15:53:18,617 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  17346. 2025-07-20 15:53:19,669 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  17347. 2025-07-20 15:53:20,741 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  17348. 2025-07-20 15:53:21,812 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  17349. 2025-07-20 15:53:22,888 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  17350. 2025-07-20 15:53:23,955 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  17351. 2025-07-20 15:53:24,205 - sglang - INFO - [2025-07-20 15:53:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901973505, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17352. 2025-07-20 15:53:24,206 - __main__ - INFO - [2025-07-20 15:53:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901973505, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17353. 2025-07-20 15:53:25,042 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  17354. 2025-07-20 15:53:25,272 - sglang - INFO - [2025-07-20 15:53:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17355. 2025-07-20 15:53:25,273 - __main__ - INFO - [2025-07-20 15:53:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17356. 2025-07-20 15:53:26,113 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  17357. 2025-07-20 15:53:27,180 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  17358. 2025-07-20 15:53:28,246 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  17359. 2025-07-20 15:53:29,447 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  17360. 2025-07-20 15:53:30,535 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  17361. 2025-07-20 15:53:31,117 - sglang - INFO - [2025-07-20 15:53:31 TP0] Overlap scheduler is disabled for multimodal models.
  17362. 2025-07-20 15:53:31,117 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Overlap scheduler is disabled for multimodal models.
  17363. 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17364. 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17365. 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17366. 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17367. 2025-07-20 15:53:31,119 - sglang - INFO - [2025-07-20 15:53:31 TP0] Init torch distributed begin.
  17368. 2025-07-20 15:53:31,119 - __main__ - INFO - [2025-07-20 15:53:31 TP0] Init torch distributed begin.
  17369. 2025-07-20 15:53:31,612 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  17370. 2025-07-20 15:53:32,675 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  17371. 2025-07-20 15:53:33,741 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  17372. 2025-07-20 15:53:34,812 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  17373. 2025-07-20 15:53:35,887 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  17374. 2025-07-20 15:53:36,705 - sglang - INFO - [2025-07-20 15:53:36 TP0] Load weight begin. avail mem=23.33 GB
  17375. 2025-07-20 15:53:36,705 - __main__ - INFO - [2025-07-20 15:53:36 TP0] Load weight begin. avail mem=23.33 GB
  17376. 2025-07-20 15:53:36,923 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  17377. 2025-07-20 15:53:37,279 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17378. 2025-07-20 15:53:37,279 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17379. 2025-07-20 15:53:37,976 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  17380. 2025-07-20 15:53:38,259 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
  17381. 2025-07-20 15:53:38,259 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.02it/s]
  17382. 2025-07-20 15:53:39,039 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  17383. 2025-07-20 15:53:39,509 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.14s/it]
  17384. 2025-07-20 15:53:39,509 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.14s/it]
  17385. 2025-07-20 15:53:40,091 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  17386. 2025-07-20 15:53:40,530 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.08s/it]
  17387. 2025-07-20 15:53:40,530 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.08s/it]
  17388. 2025-07-20 15:53:40,927 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
  17389. 2025-07-20 15:53:40,927 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.23it/s]
  17390. 2025-07-20 15:53:40,927 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
  17391. 2025-07-20 15:53:40,927 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.10it/s]
  17392. 2025-07-20 15:53:40,927 - sglang - INFO -
  17393. 2025-07-20 15:53:40,927 - __main__ - INFO -
  17394. 2025-07-20 15:53:40,978 - sglang - INFO - [2025-07-20 15:53:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17395. 2025-07-20 15:53:40,978 - __main__ - INFO - [2025-07-20 15:53:40 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17396. 2025-07-20 15:53:40,984 - sglang - INFO - [2025-07-20 15:53:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17397. 2025-07-20 15:53:40,984 - __main__ - INFO - [2025-07-20 15:53:40 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17398. 2025-07-20 15:53:40,984 - sglang - INFO - [2025-07-20 15:53:40 TP0] Memory pool end. avail mem=5.30 GB
  17399. 2025-07-20 15:53:40,984 - __main__ - INFO - [2025-07-20 15:53:40 TP0] Memory pool end. avail mem=5.30 GB
  17400. 2025-07-20 15:53:41,132 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  17401. 2025-07-20 15:53:41,153 - sglang - INFO - [2025-07-20 15:53:41 TP0] Capture cuda graph begin. This can take up to several minutes.
  17402. 2025-07-20 15:53:41,153 - __main__ - INFO - [2025-07-20 15:53:41 TP0] Capture cuda graph begin. This can take up to several minutes.
  17403. 2025-07-20 15:53:42,177 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  17404. 2025-07-20 15:53:43,016 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.71it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.63it/s] 100%|██████████| 4/4 [00:01<00:00, 2.15it/s]
  17405. 2025-07-20 15:53:43,016 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.71it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.63it/s] 100%|██████████| 4/4 [00:01<00:00, 2.15it/s]
  17406. 2025-07-20 15:53:43,016 - sglang - INFO - [2025-07-20 15:53:43 TP0] Capture cuda graph end. Time elapsed: 1.86 s
  17407. 2025-07-20 15:53:43,016 - __main__ - INFO - [2025-07-20 15:53:43 TP0] Capture cuda graph end. Time elapsed: 1.86 s
  17408. 2025-07-20 15:53:43,218 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  17409. 2025-07-20 15:53:43,703 - sglang - INFO - [2025-07-20 15:53:43 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17410. 2025-07-20 15:53:43,704 - __main__ - INFO - [2025-07-20 15:53:43 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17411. 2025-07-20 15:53:44,277 - __main__ - INFO - sglang server is ready.
  17412. 2025-07-20 15:53:44,277 - __main__ - INFO - Queue remaining: 1
  17413. 2025-07-20 15:53:44,278 - __main__ - INFO -
  17414. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17415. ----------------------------------------------------------------------------------
  17416. 2025-07-20 15:53:44,278 - __main__ - INFO -
  17417. Worker ID
  17418. ---------
  17419. 2025-07-20 15:53:44,278 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
  17420. 2025-07-20 15:53:44,278 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
  17421. 2025-07-20 15:53:44,283 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
  17422. 2025-07-20 15:53:44,790 - sglang - INFO - [2025-07-20 15:53:44 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17423. 2025-07-20 15:53:44,790 - __main__ - INFO - [2025-07-20 15:53:44 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17424. 2025-07-20 15:53:44,791 - __main__ - INFO - sglang running req: 0 queue req: 0
  17425. 2025-07-20 15:53:45,773 - sglang - INFO - [2025-07-20 15:53:45] The server is fired up and ready to roll!
  17426. 2025-07-20 15:53:45,773 - __main__ - INFO - [2025-07-20 15:53:45] The server is fired up and ready to roll!
  17427. 2025-07-20 15:53:50,742 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
  17428. 2025-07-20 15:53:54,279 - __main__ - INFO - Queue remaining: 0
  17429. 2025-07-20 15:53:54,334 - __main__ - INFO -
  17430. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17431. ----------------------------------------------------------------------------------
  17432. 2025-07-20 15:53:54,334 - __main__ - INFO -
  17433. Worker ID | started
  17434. ----------+--------
  17435. 0 | 1
  17436. 2025-07-20 15:54:04,335 - __main__ - INFO - Queue remaining: 0
  17437. 2025-07-20 15:54:04,340 - __main__ - INFO -
  17438. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17439. ----------------------------------------------------------------------------------
  17440. 2025-07-20 15:54:04,340 - __main__ - INFO -
  17441. Worker ID | started
  17442. ----------+--------
  17443. 0 | 1
  17444. 2025-07-20 15:54:14,341 - __main__ - INFO - Queue remaining: 0
  17445. 2025-07-20 15:54:14,342 - __main__ - INFO -
  17446. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17447. ----------------------------------------------------------------------------------
  17448. 2025-07-20 15:54:14,342 - __main__ - INFO -
  17449. Worker ID | started
  17450. ----------+--------
  17451. 0 | 1
  17452. 2025-07-20 15:59:14,110 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  17453. 2025-07-20 15:59:14,110 - __main__ - INFO - Loading file at tests/gnarly_pdfs/horribleocr.pdf as PDF document
  17454. 2025-07-20 15:59:14,110 - __main__ - INFO - Found 1 total pdf paths to add
  17455. 2025-07-20 15:59:14,116 - __main__ - INFO - Calculated items_per_group: 500 based on average pages per PDF: 1.00
  17456. 2025-07-20 15:59:14,297 - __main__ - INFO - Starting pipeline with PID 602377
  17457. 2025-07-20 15:59:14,297 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  17458. 2025-07-20 15:59:14,391 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  17459. 2025-07-20 15:59:15,421 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  17460. 2025-07-20 15:59:16,455 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  17461. 2025-07-20 15:59:17,506 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  17462. 2025-07-20 15:59:18,558 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  17463. 2025-07-20 15:59:19,643 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  17464. 2025-07-20 15:59:20,706 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  17465. 2025-07-20 15:59:21,736 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  17466. 2025-07-20 15:59:22,783 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  17467. 2025-07-20 15:59:23,486 - sglang - INFO - [2025-07-20 15:59:23] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=536173719, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17468. 2025-07-20 15:59:23,486 - __main__ - INFO - [2025-07-20 15:59:23] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=536173719, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17469. 2025-07-20 15:59:23,876 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  17470. 2025-07-20 15:59:24,549 - sglang - INFO - [2025-07-20 15:59:24] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17471. 2025-07-20 15:59:24,549 - __main__ - INFO - [2025-07-20 15:59:24] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17472. 2025-07-20 15:59:24,955 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  17473. 2025-07-20 15:59:26,040 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  17474. 2025-07-20 15:59:27,100 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  17475. 2025-07-20 15:59:28,159 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  17476. 2025-07-20 15:59:29,204 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  17477. 2025-07-20 15:59:30,274 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  17478. 2025-07-20 15:59:30,705 - sglang - INFO - [2025-07-20 15:59:30 TP0] Overlap scheduler is disabled for multimodal models.
  17479. 2025-07-20 15:59:30,705 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Overlap scheduler is disabled for multimodal models.
  17480. 2025-07-20 15:59:30,707 - sglang - INFO - [2025-07-20 15:59:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17481. 2025-07-20 15:59:30,707 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17482. 2025-07-20 15:59:30,707 - sglang - INFO - [2025-07-20 15:59:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17483. 2025-07-20 15:59:30,707 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17484. 2025-07-20 15:59:30,708 - sglang - INFO - [2025-07-20 15:59:30 TP0] Init torch distributed begin.
  17485. 2025-07-20 15:59:30,708 - __main__ - INFO - [2025-07-20 15:59:30 TP0] Init torch distributed begin.
  17486. 2025-07-20 15:59:31,348 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  17487. 2025-07-20 15:59:32,384 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  17488. 2025-07-20 15:59:33,440 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  17489. 2025-07-20 15:59:34,493 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  17490. 2025-07-20 15:59:35,573 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  17491. 2025-07-20 15:59:36,200 - sglang - INFO - [2025-07-20 15:59:36 TP0] Load weight begin. avail mem=23.33 GB
  17492. 2025-07-20 15:59:36,200 - __main__ - INFO - [2025-07-20 15:59:36 TP0] Load weight begin. avail mem=23.33 GB
  17493. 2025-07-20 15:59:36,649 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  17494. 2025-07-20 15:59:37,318 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17495. 2025-07-20 15:59:37,318 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17496. 2025-07-20 15:59:37,706 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  17497. 2025-07-20 15:59:38,774 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  17498. 2025-07-20 15:59:39,799 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  17499. 2025-07-20 15:59:40,844 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  17500. 2025-07-20 15:59:41,884 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  17501. 2025-07-20 15:59:42,944 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  17502. 2025-07-20 15:59:43,989 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  17503. 2025-07-20 15:59:45,030 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  17504. 2025-07-20 15:59:46,091 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  17505. 2025-07-20 15:59:47,175 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  17506. 2025-07-20 15:59:48,244 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  17507. 2025-07-20 15:59:49,295 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  17508. 2025-07-20 15:59:50,339 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  17509. 2025-07-20 15:59:50,473 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.15s/it]
  17510. 2025-07-20 15:59:50,473 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.15s/it]
  17511. 2025-07-20 15:59:51,389 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  17512. 2025-07-20 15:59:52,446 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  17513. 2025-07-20 15:59:53,489 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  17514. 2025-07-20 15:59:54,520 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  17515. 2025-07-20 15:59:55,561 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  17516. 2025-07-20 15:59:56,607 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  17517. 2025-07-20 15:59:57,649 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  17518. 2025-07-20 15:59:58,700 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  17519. 2025-07-20 15:59:59,756 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  17520. 2025-07-20 16:00:00,800 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  17521. 2025-07-20 16:00:01,838 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  17522. 2025-07-20 16:00:02,881 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  17523. 2025-07-20 16:00:03,817 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.27s/it]
  17524. 2025-07-20 16:00:03,817 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.27s/it]
  17525. 2025-07-20 16:00:03,938 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  17526. 2025-07-20 16:00:04,995 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  17527. 2025-07-20 16:00:06,040 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  17528. 2025-07-20 16:00:07,082 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  17529. 2025-07-20 16:00:08,149 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  17530. 2025-07-20 16:00:09,213 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  17531. 2025-07-20 16:00:10,281 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  17532. 2025-07-20 16:00:11,341 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  17533. 2025-07-20 16:00:12,383 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  17534. 2025-07-20 16:00:13,440 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  17535. 2025-07-20 16:00:14,504 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  17536. 2025-07-20 16:00:15,574 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  17537. 2025-07-20 16:00:16,647 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  17538. 2025-07-20 16:00:17,136 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.29s/it]
  17539. 2025-07-20 16:00:17,136 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.29s/it]
  17540. 2025-07-20 16:00:17,699 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  17541. 2025-07-20 16:00:18,757 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  17542. 2025-07-20 16:00:19,812 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  17543. 2025-07-20 16:00:20,864 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  17544. 2025-07-20 16:00:21,907 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 9.91s/it]
  17545. 2025-07-20 16:00:21,907 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 9.91s/it]
  17546. 2025-07-20 16:00:21,907 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.14s/it]
  17547. 2025-07-20 16:00:21,907 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:44<00:00, 11.14s/it]
  17548. 2025-07-20 16:00:21,907 - sglang - INFO -
  17549. 2025-07-20 16:00:21,907 - __main__ - INFO -
  17550. 2025-07-20 16:00:21,909 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  17551. 2025-07-20 16:00:21,966 - sglang - INFO - [2025-07-20 16:00:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17552. 2025-07-20 16:00:21,966 - __main__ - INFO - [2025-07-20 16:00:21 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17553. 2025-07-20 16:00:21,978 - sglang - INFO - [2025-07-20 16:00:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17554. 2025-07-20 16:00:21,978 - __main__ - INFO - [2025-07-20 16:00:21 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17555. 2025-07-20 16:00:21,978 - sglang - INFO - [2025-07-20 16:00:21 TP0] Memory pool end. avail mem=5.30 GB
  17556. 2025-07-20 16:00:21,978 - __main__ - INFO - [2025-07-20 16:00:21 TP0] Memory pool end. avail mem=5.30 GB
  17557. 2025-07-20 16:00:22,362 - sglang - INFO - [2025-07-20 16:00:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  17558. 2025-07-20 16:00:22,362 - __main__ - INFO - [2025-07-20 16:00:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  17559. 2025-07-20 16:00:22,999 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  17560. 2025-07-20 16:00:24,106 - __main__ - WARNING - Attempt 67: Please wait for sglang server to become ready...
  17561. 2025-07-20 16:00:24,997 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.61s/it] 50%|█████ | 2/4 [00:01<00:01, 1.14it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.60it/s] 100%|██████████| 4/4 [00:02<00:00, 1.96it/s] 100%|██████████| 4/4 [00:02<00:00, 1.52it/s]
  17562. 2025-07-20 16:00:24,997 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.61s/it] 50%|█████ | 2/4 [00:01<00:01, 1.14it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.60it/s] 100%|██████████| 4/4 [00:02<00:00, 1.96it/s] 100%|██████████| 4/4 [00:02<00:00, 1.52it/s]
  17563. 2025-07-20 16:00:24,997 - sglang - INFO - [2025-07-20 16:00:24 TP0] Capture cuda graph end. Time elapsed: 2.64 s
  17564. 2025-07-20 16:00:24,997 - __main__ - INFO - [2025-07-20 16:00:24 TP0] Capture cuda graph end. Time elapsed: 2.64 s
  17565. 2025-07-20 16:00:25,198 - __main__ - WARNING - Attempt 68: Please wait for sglang server to become ready...
  17566. 2025-07-20 16:00:25,894 - sglang - INFO - [2025-07-20 16:00:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17567. 2025-07-20 16:00:25,894 - __main__ - INFO - [2025-07-20 16:00:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17568. 2025-07-20 16:00:26,332 - __main__ - INFO - sglang server is ready.
  17569. 2025-07-20 16:00:26,333 - __main__ - INFO - Queue remaining: 1
  17570. 2025-07-20 16:00:26,333 - __main__ - INFO -
  17571. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17572. ----------------------------------------------------------------------------------
  17573. 2025-07-20 16:00:26,333 - __main__ - INFO -
  17574. Worker ID
  17575. ---------
  17576. 2025-07-20 16:00:26,333 - __main__ - INFO - Worker 0 processing work item 91107f3e53da42365e4111879440c8b71d98ac54
  17577. 2025-07-20 16:00:26,333 - __main__ - INFO - Created all tasks for 91107f3e53da42365e4111879440c8b71d98ac54
  17578. 2025-07-20 16:00:26,340 - __main__ - INFO - Got 1 pages to do for tests/gnarly_pdfs/horribleocr.pdf in worker 0
  17579. 2025-07-20 16:00:27,007 - sglang - INFO - [2025-07-20 16:00:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17580. 2025-07-20 16:00:27,007 - __main__ - INFO - [2025-07-20 16:00:27 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17581. 2025-07-20 16:00:27,008 - __main__ - INFO - sglang running req: 0 queue req: 0
  17582. 2025-07-20 16:00:28,270 - sglang - INFO - [2025-07-20 16:00:28] The server is fired up and ready to roll!
  17583. 2025-07-20 16:00:28,270 - __main__ - INFO - [2025-07-20 16:00:28] The server is fired up and ready to roll!
  17584. 2025-07-20 16:00:32,982 - __main__ - INFO - Built page query for tests/gnarly_pdfs/horribleocr.pdf-1
  17585. 2025-07-20 16:00:36,336 - __main__ - INFO - Queue remaining: 0
  17586. 2025-07-20 16:00:36,336 - __main__ - INFO -
  17587. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17588. ----------------------------------------------------------------------------------
  17589. 2025-07-20 16:00:36,336 - __main__ - INFO -
  17590. Worker ID | started
  17591. ----------+--------
  17592. 0 | 1
  17593. 2025-07-20 16:00:46,337 - __main__ - INFO - Queue remaining: 0
  17594. 2025-07-20 16:00:46,341 - __main__ - INFO -
  17595. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17596. ----------------------------------------------------------------------------------
  17597. 2025-07-20 16:00:46,341 - __main__ - INFO -
  17598. Worker ID | started
  17599. ----------+--------
  17600. 0 | 1
  17601. 2025-07-20 16:00:56,344 - __main__ - INFO - Queue remaining: 0
  17602. 2025-07-20 16:00:56,344 - __main__ - INFO -
  17603. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17604. ----------------------------------------------------------------------------------
  17605. 2025-07-20 16:00:56,344 - __main__ - INFO -
  17606. Worker ID | started
  17607. ----------+--------
  17608. 0 | 1
  17609. 2025-07-20 16:00:58,657 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  17610. 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 1 exiting due to empty queue
  17611. 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 2 exiting due to empty queue
  17612. 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 3 exiting due to empty queue
  17613. 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 4 exiting due to empty queue
  17614. 2025-07-20 16:00:58,657 - __main__ - INFO - Worker 5 exiting due to empty queue
  17615. 2025-07-20 16:00:58,658 - __main__ - INFO - Worker 6 exiting due to empty queue
  17616. 2025-07-20 16:00:58,658 - __main__ - INFO - Worker 7 exiting due to empty queue
  17617. 2025-07-20 16:01:00,171 - sglang - INFO - [2025-07-20 16:01:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1809, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17618. 2025-07-20 16:01:00,171 - __main__ - INFO - sglang running req: 0 queue req: 0
  17619. 2025-07-20 16:02:40,891 - __main__ - INFO - Queue remaining: 0
  17620. 2025-07-20 16:02:40,951 - __main__ - INFO -
  17621. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17622. ----------------------------------------------------------------------------------
  17623. 2025-07-20 16:02:40,952 - __main__ - INFO -
  17624. Worker ID | started
  17625. ----------+--------
  17626. 0 | 1
  17627. 2025-07-20 16:08:42,791 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  17628. 2025-07-20 16:08:42,792 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  17629. 2025-07-20 16:08:42,792 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  17630. 2025-07-20 16:08:42,793 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  17631. 2025-07-20 16:08:42,793 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  17632. 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  17633. 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  17634. 2025-07-20 16:08:42,794 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  17635. 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  17636. 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  17637. 2025-07-20 16:08:42,795 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  17638. 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  17639. 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  17640. 2025-07-20 16:08:42,796 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  17641. 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  17642. 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  17643. 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  17644. 2025-07-20 16:08:42,797 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  17645. 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  17646. 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  17647. 2025-07-20 16:08:42,798 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  17648. 2025-07-20 16:08:42,799 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  17649. 2025-07-20 16:08:42,799 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  17650. 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  17651. 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  17652. 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  17653. 2025-07-20 16:08:42,800 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  17654. 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  17655. 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  17656. 2025-07-20 16:08:42,801 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  17657. 2025-07-20 16:08:42,802 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  17658. 2025-07-20 16:08:42,802 - __main__ - INFO - Found 30 total pdf paths to add
  17659. 2025-07-20 16:08:42,895 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
  17660. 2025-07-20 16:08:43,116 - __main__ - INFO - Starting pipeline with PID 604527
  17661. 2025-07-20 16:08:43,116 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  17662. 2025-07-20 16:08:43,205 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  17663. 2025-07-20 16:08:44,236 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  17664. 2025-07-20 16:08:45,273 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  17665. 2025-07-20 16:08:46,325 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  17666. 2025-07-20 16:08:47,376 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  17667. 2025-07-20 16:08:48,442 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  17668. 2025-07-20 16:08:49,509 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  17669. 2025-07-20 16:08:50,577 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  17670. 2025-07-20 16:08:51,644 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  17671. 2025-07-20 16:08:51,682 - sglang - INFO - [2025-07-20 16:08:51] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=19970587, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17672. 2025-07-20 16:08:51,682 - __main__ - INFO - [2025-07-20 16:08:51] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=19970587, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17673. 2025-07-20 16:08:52,707 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  17674. 2025-07-20 16:08:52,763 - sglang - INFO - [2025-07-20 16:08:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17675. 2025-07-20 16:08:52,763 - __main__ - INFO - [2025-07-20 16:08:52] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17676. 2025-07-20 16:08:53,739 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  17677. 2025-07-20 16:08:54,800 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  17678. 2025-07-20 16:08:55,868 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  17679. 2025-07-20 16:08:57,085 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  17680. 2025-07-20 16:08:58,164 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  17681. 2025-07-20 16:08:58,652 - sglang - INFO - [2025-07-20 16:08:58 TP0] Overlap scheduler is disabled for multimodal models.
  17682. 2025-07-20 16:08:58,652 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Overlap scheduler is disabled for multimodal models.
  17683. 2025-07-20 16:08:58,654 - sglang - INFO - [2025-07-20 16:08:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17684. 2025-07-20 16:08:58,654 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17685. 2025-07-20 16:08:58,654 - sglang - INFO - [2025-07-20 16:08:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17686. 2025-07-20 16:08:58,655 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17687. 2025-07-20 16:08:58,655 - sglang - INFO - [2025-07-20 16:08:58 TP0] Init torch distributed begin.
  17688. 2025-07-20 16:08:58,655 - __main__ - INFO - [2025-07-20 16:08:58 TP0] Init torch distributed begin.
  17689. 2025-07-20 16:08:59,241 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  17690. 2025-07-20 16:09:00,296 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  17691. 2025-07-20 16:09:01,363 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  17692. 2025-07-20 16:09:02,421 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  17693. 2025-07-20 16:09:03,488 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  17694. 2025-07-20 16:09:04,254 - sglang - INFO - [2025-07-20 16:09:04 TP0] Load weight begin. avail mem=23.33 GB
  17695. 2025-07-20 16:09:04,254 - __main__ - INFO - [2025-07-20 16:09:04 TP0] Load weight begin. avail mem=23.33 GB
  17696. 2025-07-20 16:09:04,565 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  17697. 2025-07-20 16:09:04,934 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17698. 2025-07-20 16:09:04,934 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17699. 2025-07-20 16:09:05,642 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  17700. 2025-07-20 16:09:06,709 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  17701. 2025-07-20 16:09:07,777 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  17702. 2025-07-20 16:09:08,845 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  17703. 2025-07-20 16:09:09,906 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  17704. 2025-07-20 16:09:10,977 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  17705. 2025-07-20 16:09:12,050 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  17706. 2025-07-20 16:09:13,118 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  17707. 2025-07-20 16:09:14,186 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  17708. 2025-07-20 16:09:15,250 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  17709. 2025-07-20 16:09:16,310 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  17710. 2025-07-20 16:09:17,362 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  17711. 2025-07-20 16:09:17,696 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.76s/it]
  17712. 2025-07-20 16:09:17,696 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.76s/it]
  17713. 2025-07-20 16:09:17,885 - __main__ - INFO - Got cancellation request for SGLang server
  17714. 2025-07-20 16:09:38,253 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  17715. 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  17716. 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  17717. 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  17718. 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  17719. 2025-07-20 16:09:38,253 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  17720. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  17721. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  17722. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  17723. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  17724. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  17725. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  17726. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  17727. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  17728. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  17729. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  17730. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  17731. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  17732. 2025-07-20 16:09:38,254 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  17733. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  17734. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  17735. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  17736. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  17737. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  17738. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  17739. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  17740. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  17741. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  17742. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  17743. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  17744. 2025-07-20 16:09:38,255 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  17745. 2025-07-20 16:09:38,255 - __main__ - INFO - Found 30 total pdf paths to add
  17746. 2025-07-20 16:09:38,306 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
  17747. 2025-07-20 16:09:38,492 - __main__ - INFO - Starting pipeline with PID 605324
  17748. 2025-07-20 16:09:38,492 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  17749. 2025-07-20 16:09:38,558 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  17750. 2025-07-20 16:09:39,588 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  17751. 2025-07-20 16:09:40,623 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  17752. 2025-07-20 16:09:41,668 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  17753. 2025-07-20 16:09:42,730 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  17754. 2025-07-20 16:09:43,797 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  17755. 2025-07-20 16:09:44,867 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  17756. 2025-07-20 16:09:45,079 - sglang - INFO - [2025-07-20 16:09:45] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=829163176, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17757. 2025-07-20 16:09:45,079 - __main__ - INFO - [2025-07-20 16:09:45] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=829163176, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  17758. 2025-07-20 16:09:45,967 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  17759. 2025-07-20 16:09:46,043 - sglang - INFO - [2025-07-20 16:09:46] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17760. 2025-07-20 16:09:46,043 - __main__ - INFO - [2025-07-20 16:09:46] Use chat template for the OpenAI-compatible API server: qwen2-vl
  17761. 2025-07-20 16:09:47,043 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  17762. 2025-07-20 16:09:48,112 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  17763. 2025-07-20 16:09:49,182 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  17764. 2025-07-20 16:09:50,238 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  17765. 2025-07-20 16:09:51,283 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  17766. 2025-07-20 16:09:52,356 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  17767. 2025-07-20 16:09:52,586 - sglang - INFO - [2025-07-20 16:09:52 TP0] Overlap scheduler is disabled for multimodal models.
  17768. 2025-07-20 16:09:52,587 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Overlap scheduler is disabled for multimodal models.
  17769. 2025-07-20 16:09:52,589 - sglang - INFO - [2025-07-20 16:09:52 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17770. 2025-07-20 16:09:52,589 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  17771. 2025-07-20 16:09:52,590 - sglang - INFO - [2025-07-20 16:09:52 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17772. 2025-07-20 16:09:52,590 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  17773. 2025-07-20 16:09:52,590 - sglang - INFO - [2025-07-20 16:09:52 TP0] Init torch distributed begin.
  17774. 2025-07-20 16:09:52,590 - __main__ - INFO - [2025-07-20 16:09:52 TP0] Init torch distributed begin.
  17775. 2025-07-20 16:09:53,433 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  17776. 2025-07-20 16:09:54,500 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  17777. 2025-07-20 16:09:55,568 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  17778. 2025-07-20 16:09:56,612 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  17779. 2025-07-20 16:09:57,663 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  17780. 2025-07-20 16:09:58,130 - sglang - INFO - [2025-07-20 16:09:58 TP0] Load weight begin. avail mem=23.33 GB
  17781. 2025-07-20 16:09:58,130 - __main__ - INFO - [2025-07-20 16:09:58 TP0] Load weight begin. avail mem=23.33 GB
  17782. 2025-07-20 16:09:58,738 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17783. 2025-07-20 16:09:58,738 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  17784. 2025-07-20 16:09:58,739 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  17785. 2025-07-20 16:09:59,480 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.26it/s]
  17786. 2025-07-20 16:09:59,480 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.26it/s]
  17787. 2025-07-20 16:09:59,815 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  17788. 2025-07-20 16:10:00,882 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  17789. 2025-07-20 16:10:01,911 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  17790. 2025-07-20 16:10:02,974 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  17791. 2025-07-20 16:10:04,041 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  17792. 2025-07-20 16:10:05,113 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  17793. 2025-07-20 16:10:06,180 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  17794. 2025-07-20 16:10:07,247 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  17795. 2025-07-20 16:10:08,314 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  17796. 2025-07-20 16:10:09,381 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  17797. 2025-07-20 16:10:10,461 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  17798. 2025-07-20 16:10:11,524 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  17799. 2025-07-20 16:10:12,226 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:13<00:15, 7.83s/it]
  17800. 2025-07-20 16:10:12,226 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:13<00:15, 7.83s/it]
  17801. 2025-07-20 16:10:12,600 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  17802. 2025-07-20 16:10:13,667 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  17803. 2025-07-20 16:10:14,722 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  17804. 2025-07-20 16:10:15,789 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  17805. 2025-07-20 16:10:16,859 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  17806. 2025-07-20 16:10:17,927 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  17807. 2025-07-20 16:10:18,994 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  17808. 2025-07-20 16:10:20,066 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  17809. 2025-07-20 16:10:21,133 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  17810. 2025-07-20 16:10:22,205 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  17811. 2025-07-20 16:10:23,272 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  17812. 2025-07-20 16:10:24,339 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  17813. 2025-07-20 16:10:24,897 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:26<00:10, 10.04s/it]
  17814. 2025-07-20 16:10:24,897 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:26<00:10, 10.04s/it]
  17815. 2025-07-20 16:10:25,415 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  17816. 2025-07-20 16:10:26,482 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  17817. 2025-07-20 16:10:27,549 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  17818. 2025-07-20 16:10:28,616 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  17819. 2025-07-20 16:10:29,296 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.81s/it]
  17820. 2025-07-20 16:10:29,296 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.81s/it]
  17821. 2025-07-20 16:10:29,296 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.65s/it]
  17822. 2025-07-20 16:10:29,296 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:30<00:00, 7.65s/it]
  17823. 2025-07-20 16:10:29,296 - sglang - INFO -
  17824. 2025-07-20 16:10:29,296 - __main__ - INFO -
  17825. 2025-07-20 16:10:29,377 - sglang - INFO - [2025-07-20 16:10:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17826. 2025-07-20 16:10:29,377 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  17827. 2025-07-20 16:10:29,390 - sglang - INFO - [2025-07-20 16:10:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17828. 2025-07-20 16:10:29,391 - __main__ - INFO - [2025-07-20 16:10:29 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  17829. 2025-07-20 16:10:29,391 - sglang - INFO - [2025-07-20 16:10:29 TP0] Memory pool end. avail mem=5.30 GB
  17830. 2025-07-20 16:10:29,391 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Memory pool end. avail mem=5.30 GB
  17831. 2025-07-20 16:10:29,605 - sglang - INFO - [2025-07-20 16:10:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  17832. 2025-07-20 16:10:29,605 - __main__ - INFO - [2025-07-20 16:10:29 TP0] Capture cuda graph begin. This can take up to several minutes.
  17833. 2025-07-20 16:10:29,692 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  17834. 2025-07-20 16:10:30,746 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  17835. 2025-07-20 16:10:31,822 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  17836. 2025-07-20 16:10:31,851 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.38s/it] 50%|█████ | 2/4 [00:01<00:01, 1.36it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.89it/s] 100%|██████████| 4/4 [00:02<00:00, 2.29it/s] 100%|██████████| 4/4 [00:02<00:00, 1.78it/s]
  17837. 2025-07-20 16:10:31,851 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.38s/it] 50%|█████ | 2/4 [00:01<00:01, 1.36it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.89it/s] 100%|██████████| 4/4 [00:02<00:00, 2.29it/s] 100%|██████████| 4/4 [00:02<00:00, 1.78it/s]
  17838. 2025-07-20 16:10:31,851 - sglang - INFO - [2025-07-20 16:10:31 TP0] Capture cuda graph end. Time elapsed: 2.25 s
  17839. 2025-07-20 16:10:31,851 - __main__ - INFO - [2025-07-20 16:10:31 TP0] Capture cuda graph end. Time elapsed: 2.25 s
  17840. 2025-07-20 16:10:32,610 - sglang - INFO - [2025-07-20 16:10:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17841. 2025-07-20 16:10:32,610 - __main__ - INFO - [2025-07-20 16:10:32 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  17842. 2025-07-20 16:10:32,906 - __main__ - INFO - sglang server is ready.
  17843. 2025-07-20 16:10:32,906 - __main__ - INFO - Queue remaining: 1
  17844. 2025-07-20 16:10:32,906 - __main__ - INFO -
  17845. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17846. ----------------------------------------------------------------------------------
  17847. 2025-07-20 16:10:32,906 - __main__ - INFO -
  17848. Worker ID
  17849. ---------
  17850. 2025-07-20 16:10:32,907 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  17851. 2025-07-20 16:10:32,907 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  17852. 2025-07-20 16:10:32,920 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
  17853. 2025-07-20 16:10:32,923 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
  17854. 2025-07-20 16:10:32,925 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
  17855. 2025-07-20 16:10:32,927 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
  17856. 2025-07-20 16:10:32,930 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
  17857. 2025-07-20 16:10:32,932 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
  17858. 2025-07-20 16:10:32,934 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
  17859. 2025-07-20 16:10:32,936 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
  17860. 2025-07-20 16:10:32,939 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
  17861. 2025-07-20 16:10:32,942 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
  17862. 2025-07-20 16:10:32,944 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
  17863. 2025-07-20 16:10:32,946 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
  17864. 2025-07-20 16:10:32,949 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  17865. 2025-07-20 16:10:32,951 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
  17866. 2025-07-20 16:10:32,953 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
  17867. 2025-07-20 16:10:32,955 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
  17868. 2025-07-20 16:10:32,957 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
  17869. 2025-07-20 16:10:32,959 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
  17870. 2025-07-20 16:10:32,961 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
  17871. 2025-07-20 16:10:32,962 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
  17872. 2025-07-20 16:10:32,964 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
  17873. 2025-07-20 16:10:32,966 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
  17874. 2025-07-20 16:10:32,968 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
  17875. 2025-07-20 16:10:32,970 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
  17876. 2025-07-20 16:10:32,972 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
  17877. 2025-07-20 16:10:32,975 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
  17878. 2025-07-20 16:10:32,976 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
  17879. 2025-07-20 16:10:32,978 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
  17880. 2025-07-20 16:10:33,053 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
  17881. 2025-07-20 16:10:33,057 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
  17882. 2025-07-20 16:10:33,743 - sglang - INFO - [2025-07-20 16:10:33 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17883. 2025-07-20 16:10:33,743 - __main__ - INFO - [2025-07-20 16:10:33 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  17884. 2025-07-20 16:10:33,744 - __main__ - INFO - sglang running req: 0 queue req: 0
  17885. 2025-07-20 16:10:39,640 - sglang - INFO - [2025-07-20 16:10:39] The server is fired up and ready to roll!
  17886. 2025-07-20 16:10:39,640 - __main__ - INFO - [2025-07-20 16:10:39] The server is fired up and ready to roll!
  17887. 2025-07-20 16:10:42,934 - __main__ - INFO - Queue remaining: 0
  17888. 2025-07-20 16:10:42,934 - __main__ - INFO -
  17889. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17890. ----------------------------------------------------------------------------------
  17891. 2025-07-20 16:10:42,935 - __main__ - INFO -
  17892. Worker ID | started
  17893. ----------+--------
  17894. 0 | 228
  17895. 2025-07-20 16:10:52,419 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
  17896. 2025-07-20 16:10:52,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
  17897. 2025-07-20 16:10:52,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
  17898. 2025-07-20 16:10:52,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
  17899. 2025-07-20 16:10:52,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
  17900. 2025-07-20 16:10:52,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
  17901. 2025-07-20 16:10:52,464 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
  17902. 2025-07-20 16:10:52,481 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  17903. 2025-07-20 16:10:52,482 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
  17904. 2025-07-20 16:10:52,489 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
  17905. 2025-07-20 16:10:52,492 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
  17906. 2025-07-20 16:10:52,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
  17907. 2025-07-20 16:10:52,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
  17908. 2025-07-20 16:10:52,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
  17909. 2025-07-20 16:10:52,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
  17910. 2025-07-20 16:10:52,553 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
  17911. 2025-07-20 16:10:52,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
  17912. 2025-07-20 16:10:52,563 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
  17913. 2025-07-20 16:10:52,570 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
  17914. 2025-07-20 16:10:52,572 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
  17915. 2025-07-20 16:10:52,579 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
  17916. 2025-07-20 16:10:52,579 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
  17917. 2025-07-20 16:10:52,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
  17918. 2025-07-20 16:10:52,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
  17919. 2025-07-20 16:10:52,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
  17920. 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
  17921. 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
  17922. 2025-07-20 16:10:52,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
  17923. 2025-07-20 16:10:52,653 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
  17924. 2025-07-20 16:10:52,654 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
  17925. 2025-07-20 16:10:52,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
  17926. 2025-07-20 16:10:52,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
  17927. 2025-07-20 16:10:52,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
  17928. 2025-07-20 16:10:52,664 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
  17929. 2025-07-20 16:10:52,671 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
  17930. 2025-07-20 16:10:52,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
  17931. 2025-07-20 16:10:52,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
  17932. 2025-07-20 16:10:52,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
  17933. 2025-07-20 16:10:52,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
  17934. 2025-07-20 16:10:52,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
  17935. 2025-07-20 16:10:52,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
  17936. 2025-07-20 16:10:52,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
  17937. 2025-07-20 16:10:52,752 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
  17938. 2025-07-20 16:10:52,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
  17939. 2025-07-20 16:10:52,754 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
  17940. 2025-07-20 16:10:52,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
  17941. 2025-07-20 16:10:52,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
  17942. 2025-07-20 16:10:52,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
  17943. 2025-07-20 16:10:52,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
  17944. 2025-07-20 16:10:52,841 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
  17945. 2025-07-20 16:10:52,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
  17946. 2025-07-20 16:10:52,844 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
  17947. 2025-07-20 16:10:52,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
  17948. 2025-07-20 16:10:52,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
  17949. 2025-07-20 16:10:52,846 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
  17950. 2025-07-20 16:10:52,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
  17951. 2025-07-20 16:10:52,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
  17952. 2025-07-20 16:10:52,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
  17953. 2025-07-20 16:10:52,936 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
  17954. 2025-07-20 16:10:52,936 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
  17955. 2025-07-20 16:10:52,937 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
  17956. 2025-07-20 16:10:52,944 - __main__ - INFO - Queue remaining: 0
  17957. 2025-07-20 16:10:52,944 - __main__ - INFO -
  17958. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  17959. ----------------------------------------------------------------------------------
  17960. 2025-07-20 16:10:52,945 - __main__ - INFO -
  17961. Worker ID | started
  17962. ----------+--------
  17963. 0 | 228
  17964. 2025-07-20 16:10:52,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
  17965. 2025-07-20 16:10:52,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
  17966. 2025-07-20 16:10:52,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
  17967. 2025-07-20 16:10:52,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
  17968. 2025-07-20 16:10:52,948 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
  17969. 2025-07-20 16:10:52,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
  17970. 2025-07-20 16:10:52,957 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
  17971. 2025-07-20 16:10:52,958 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
  17972. 2025-07-20 16:10:53,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  17973. 2025-07-20 16:10:53,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
  17974. 2025-07-20 16:10:53,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
  17975. 2025-07-20 16:10:53,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  17976. 2025-07-20 16:10:53,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
  17977. 2025-07-20 16:10:53,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
  17978. 2025-07-20 16:10:53,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
  17979. 2025-07-20 16:10:53,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
  17980. 2025-07-20 16:10:53,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
  17981. 2025-07-20 16:10:53,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
  17982. 2025-07-20 16:10:53,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
  17983. 2025-07-20 16:10:53,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
  17984. 2025-07-20 16:10:53,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  17985. 2025-07-20 16:10:53,159 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
  17986. 2025-07-20 16:10:53,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  17987. 2025-07-20 16:10:53,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
  17988. 2025-07-20 16:10:53,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
  17989. 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
  17990. 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  17991. 2025-07-20 16:10:53,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
  17992. 2025-07-20 16:10:53,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
  17993. 2025-07-20 16:10:53,259 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
  17994. 2025-07-20 16:10:53,260 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
  17995. 2025-07-20 16:10:53,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
  17996. 2025-07-20 16:10:53,335 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
  17997. 2025-07-20 16:10:53,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
  17998. 2025-07-20 16:10:53,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
  17999. 2025-07-20 16:10:53,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
  18000. 2025-07-20 16:10:53,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
  18001. 2025-07-20 16:10:53,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
  18002. 2025-07-20 16:10:53,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
  18003. 2025-07-20 16:10:53,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
  18004. 2025-07-20 16:10:53,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
  18005. 2025-07-20 16:10:53,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
  18006. 2025-07-20 16:10:53,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  18007. 2025-07-20 16:10:53,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
  18008. 2025-07-20 16:10:53,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
  18009. 2025-07-20 16:10:53,455 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
  18010. 2025-07-20 16:10:53,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
  18011. 2025-07-20 16:10:53,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
  18012. 2025-07-20 16:10:53,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
  18013. 2025-07-20 16:10:53,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
  18014. 2025-07-20 16:10:53,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
  18015. 2025-07-20 16:10:53,545 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
  18016. 2025-07-20 16:10:53,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
  18017. 2025-07-20 16:10:53,549 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
  18018. 2025-07-20 16:10:53,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
  18019. 2025-07-20 16:10:53,551 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
  18020. 2025-07-20 16:10:53,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
  18021. 2025-07-20 16:10:53,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
  18022. 2025-07-20 16:10:53,650 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
  18023. 2025-07-20 16:10:53,652 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
  18024. 2025-07-20 16:10:53,653 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
  18025. 2025-07-20 16:10:53,654 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
  18026. 2025-07-20 16:10:53,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
  18027. 2025-07-20 16:10:53,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
  18028. 2025-07-20 16:10:53,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
  18029. 2025-07-20 16:10:53,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
  18030. 2025-07-20 16:10:53,757 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
  18031. 2025-07-20 16:10:53,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
  18032. 2025-07-20 16:10:53,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
  18033. 2025-07-20 16:10:53,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
  18034. 2025-07-20 16:10:53,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
  18035. 2025-07-20 16:10:53,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
  18036. 2025-07-20 16:10:53,841 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
  18037. 2025-07-20 16:10:53,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
  18038. 2025-07-20 16:10:53,844 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
  18039. 2025-07-20 16:10:53,846 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
  18040. 2025-07-20 16:10:53,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
  18041. 2025-07-20 16:10:53,940 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
  18042. 2025-07-20 16:10:53,941 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
  18043. 2025-07-20 16:10:53,942 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
  18044. 2025-07-20 16:10:53,944 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  18045. 2025-07-20 16:10:53,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
  18046. 2025-07-20 16:10:53,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
  18047. 2025-07-20 16:10:53,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
  18048. 2025-07-20 16:10:53,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
  18049. 2025-07-20 16:10:53,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
  18050. 2025-07-20 16:10:54,034 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
  18051. 2025-07-20 16:10:54,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
  18052. 2025-07-20 16:10:54,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
  18053. 2025-07-20 16:10:54,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
  18054. 2025-07-20 16:10:54,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
  18055. 2025-07-20 16:10:54,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
  18056. 2025-07-20 16:10:54,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
  18057. 2025-07-20 16:10:54,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
  18058. 2025-07-20 16:10:54,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
  18059. 2025-07-20 16:10:54,240 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
  18060. 2025-07-20 16:10:54,243 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
  18061. 2025-07-20 16:10:54,245 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
  18062. 2025-07-20 16:10:54,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
  18063. 2025-07-20 16:10:54,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
  18064. 2025-07-20 16:10:54,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
  18065. 2025-07-20 16:10:54,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
  18066. 2025-07-20 16:10:54,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
  18067. 2025-07-20 16:10:54,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
  18068. 2025-07-20 16:10:54,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
  18069. 2025-07-20 16:10:54,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
  18070. 2025-07-20 16:10:54,356 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
  18071. 2025-07-20 16:10:54,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
  18072. 2025-07-20 16:10:54,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
  18073. 2025-07-20 16:10:54,362 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
  18074. 2025-07-20 16:10:54,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
  18075. 2025-07-20 16:10:54,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
  18076. 2025-07-20 16:10:54,367 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
  18077. 2025-07-20 16:10:54,369 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
  18078. 2025-07-20 16:10:54,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
  18079. 2025-07-20 16:10:54,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
  18080. 2025-07-20 16:10:54,438 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
  18081. 2025-07-20 16:10:54,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
  18082. 2025-07-20 16:10:54,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
  18083. 2025-07-20 16:10:54,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
  18084. 2025-07-20 16:10:54,457 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
  18085. 2025-07-20 16:10:54,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
  18086. 2025-07-20 16:10:54,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
  18087. 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
  18088. 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
  18089. 2025-07-20 16:10:54,461 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
  18090. 2025-07-20 16:10:54,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
  18091. 2025-07-20 16:10:54,465 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
  18092. 2025-07-20 16:10:54,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  18093. 2025-07-20 16:10:54,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
  18094. 2025-07-20 16:10:54,540 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
  18095. 2025-07-20 16:10:54,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
  18096. 2025-07-20 16:10:54,541 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
  18097. 2025-07-20 16:10:54,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
  18098. 2025-07-20 16:10:54,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
  18099. 2025-07-20 16:10:54,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
  18100. 2025-07-20 16:10:54,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
  18101. 2025-07-20 16:10:54,554 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
  18102. 2025-07-20 16:10:54,554 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
  18103. 2025-07-20 16:10:54,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
  18104. 2025-07-20 16:10:54,560 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
  18105. 2025-07-20 16:10:54,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
  18106. 2025-07-20 16:10:54,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  18107. 2025-07-20 16:10:54,666 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
  18108. 2025-07-20 16:10:54,668 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
  18109. 2025-07-20 16:10:54,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
  18110. 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
  18111. 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
  18112. 2025-07-20 16:10:54,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
  18113. 2025-07-20 16:10:54,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
  18114. 2025-07-20 16:10:54,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
  18115. 2025-07-20 16:10:54,741 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
  18116. 2025-07-20 16:10:54,741 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
  18117. 2025-07-20 16:10:54,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
  18118. 2025-07-20 16:10:54,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
  18119. 2025-07-20 16:10:54,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
  18120. 2025-07-20 16:10:54,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
  18121. 2025-07-20 16:10:54,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
  18122. 2025-07-20 16:10:54,752 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  18123. 2025-07-20 16:10:54,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
  18124. 2025-07-20 16:10:54,772 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
  18125. 2025-07-20 16:10:54,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
  18126. 2025-07-20 16:10:54,854 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
  18127. 2025-07-20 16:10:54,854 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
  18128. 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
  18129. 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
  18130. 2025-07-20 16:10:54,858 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
  18131. 2025-07-20 16:11:02,945 - __main__ - INFO - Queue remaining: 0
  18132. 2025-07-20 16:11:02,949 - __main__ - INFO -
  18133. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18134. ----------------------------------------------------------------------------------
  18135. 2025-07-20 16:11:02,952 - __main__ - INFO -
  18136. Worker ID | started
  18137. ----------+--------
  18138. 0 | 228
  18139. 2025-07-20 16:11:09,843 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  18140. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 1 exiting due to empty queue
  18141. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 2 exiting due to empty queue
  18142. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 3 exiting due to empty queue
  18143. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 4 exiting due to empty queue
  18144. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 5 exiting due to empty queue
  18145. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 6 exiting due to empty queue
  18146. 2025-07-20 16:11:09,843 - __main__ - INFO - Worker 7 exiting due to empty queue
  18147. 2025-07-20 16:13:35,181 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  18148. 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  18149. 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  18150. 2025-07-20 16:13:35,182 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  18151. 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  18152. 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  18153. 2025-07-20 16:13:35,183 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  18154. 2025-07-20 16:13:35,184 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  18155. 2025-07-20 16:13:35,184 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  18156. 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  18157. 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  18158. 2025-07-20 16:13:35,185 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  18159. 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  18160. 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  18161. 2025-07-20 16:13:35,186 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  18162. 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  18163. 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  18164. 2025-07-20 16:13:35,187 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  18165. 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  18166. 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  18167. 2025-07-20 16:13:35,188 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  18168. 2025-07-20 16:13:35,189 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  18169. 2025-07-20 16:13:35,189 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  18170. 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  18171. 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  18172. 2025-07-20 16:13:35,190 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  18173. 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  18174. 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  18175. 2025-07-20 16:13:35,191 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  18176. 2025-07-20 16:13:35,192 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  18177. 2025-07-20 16:13:35,192 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  18178. 2025-07-20 16:13:35,192 - __main__ - INFO - Found 30 total pdf paths to add
  18179. 2025-07-20 16:13:35,292 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
  18180. 2025-07-20 16:13:35,458 - __main__ - INFO - Starting pipeline with PID 609195
  18181. 2025-07-20 16:13:35,459 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  18182. 2025-07-20 16:13:35,572 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  18183. 2025-07-20 16:13:36,598 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  18184. 2025-07-20 16:13:37,641 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  18185. 2025-07-20 16:13:38,699 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  18186. 2025-07-20 16:13:39,765 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  18187. 2025-07-20 16:13:40,832 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  18188. 2025-07-20 16:13:41,861 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  18189. 2025-07-20 16:13:42,920 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  18190. 2025-07-20 16:13:43,777 - sglang - INFO - [2025-07-20 16:13:43] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=144398080, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  18191. 2025-07-20 16:13:43,777 - __main__ - INFO - [2025-07-20 16:13:43] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=144398080, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  18192. 2025-07-20 16:13:44,053 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  18193. 2025-07-20 16:13:44,669 - sglang - INFO - [2025-07-20 16:13:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  18194. 2025-07-20 16:13:44,669 - __main__ - INFO - [2025-07-20 16:13:44] Use chat template for the OpenAI-compatible API server: qwen2-vl
  18195. 2025-07-20 16:13:45,097 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  18196. 2025-07-20 16:13:46,160 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  18197. 2025-07-20 16:13:47,196 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  18198. 2025-07-20 16:13:48,259 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  18199. 2025-07-20 16:13:49,395 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  18200. 2025-07-20 16:13:50,462 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  18201. 2025-07-20 16:13:50,913 - sglang - INFO - [2025-07-20 16:13:50 TP0] Overlap scheduler is disabled for multimodal models.
  18202. 2025-07-20 16:13:50,913 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Overlap scheduler is disabled for multimodal models.
  18203. 2025-07-20 16:13:50,915 - sglang - INFO - [2025-07-20 16:13:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  18204. 2025-07-20 16:13:50,915 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  18205. 2025-07-20 16:13:50,915 - sglang - INFO - [2025-07-20 16:13:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  18206. 2025-07-20 16:13:50,915 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  18207. 2025-07-20 16:13:50,916 - sglang - INFO - [2025-07-20 16:13:50 TP0] Init torch distributed begin.
  18208. 2025-07-20 16:13:50,916 - __main__ - INFO - [2025-07-20 16:13:50 TP0] Init torch distributed begin.
  18209. 2025-07-20 16:13:51,546 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  18210. 2025-07-20 16:13:52,607 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  18211. 2025-07-20 16:13:53,674 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  18212. 2025-07-20 16:13:54,746 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  18213. 2025-07-20 16:13:55,800 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  18214. 2025-07-20 16:13:56,515 - sglang - INFO - [2025-07-20 16:13:56 TP0] Load weight begin. avail mem=23.33 GB
  18215. 2025-07-20 16:13:56,515 - __main__ - INFO - [2025-07-20 16:13:56 TP0] Load weight begin. avail mem=23.33 GB
  18216. 2025-07-20 16:13:56,854 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  18217. 2025-07-20 16:13:57,220 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  18218. 2025-07-20 16:13:57,221 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  18219. 2025-07-20 16:13:57,906 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  18220. 2025-07-20 16:13:58,974 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  18221. 2025-07-20 16:14:00,041 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  18222. 2025-07-20 16:14:01,108 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  18223. 2025-07-20 16:14:02,175 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  18224. 2025-07-20 16:14:03,243 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  18225. 2025-07-20 16:14:04,311 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  18226. 2025-07-20 16:14:05,379 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  18227. 2025-07-20 16:14:06,408 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  18228. 2025-07-20 16:14:07,460 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  18229. 2025-07-20 16:14:08,524 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  18230. 2025-07-20 16:14:09,594 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  18231. 2025-07-20 16:14:10,327 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.11s/it]
  18232. 2025-07-20 16:14:10,327 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.11s/it]
  18233. 2025-07-20 16:14:10,664 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  18234. 2025-07-20 16:14:11,716 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  18235. 2025-07-20 16:14:12,782 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  18236. 2025-07-20 16:14:13,850 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  18237. 2025-07-20 16:14:14,917 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  18238. 2025-07-20 16:14:15,947 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  18239. 2025-07-20 16:14:17,004 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  18240. 2025-07-20 16:14:18,072 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  18241. 2025-07-20 16:14:19,139 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  18242. 2025-07-20 16:14:20,207 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  18243. 2025-07-20 16:14:21,251 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  18244. 2025-07-20 16:14:22,315 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  18245. 2025-07-20 16:14:23,383 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  18246. 2025-07-20 16:14:23,546 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.17s/it]
  18247. 2025-07-20 16:14:23,546 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.17s/it]
  18248. 2025-07-20 16:14:24,460 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  18249. 2025-07-20 16:14:25,528 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  18250. 2025-07-20 16:14:26,595 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  18251. 2025-07-20 16:14:27,662 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  18252. 2025-07-20 16:14:28,725 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  18253. 2025-07-20 16:14:29,784 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  18254. 2025-07-20 16:14:30,850 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  18255. 2025-07-20 16:14:31,916 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  18256. 2025-07-20 16:14:32,983 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  18257. 2025-07-20 16:14:34,052 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  18258. 2025-07-20 16:14:35,119 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  18259. 2025-07-20 16:14:36,187 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  18260. 2025-07-20 16:14:36,446 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.05s/it]
  18261. 2025-07-20 16:14:36,446 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.05s/it]
  18262. 2025-07-20 16:14:37,263 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  18263. 2025-07-20 16:14:38,330 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  18264. 2025-07-20 16:14:39,398 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  18265. 2025-07-20 16:14:40,465 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  18266. 2025-07-20 16:14:41,003 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.70s/it]
  18267. 2025-07-20 16:14:41,003 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.70s/it]
  18268. 2025-07-20 16:14:41,004 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  18269. 2025-07-20 16:14:41,004 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  18270. 2025-07-20 16:14:41,004 - sglang - INFO -
  18271. 2025-07-20 16:14:41,004 - __main__ - INFO -
  18272. 2025-07-20 16:14:41,099 - sglang - INFO - [2025-07-20 16:14:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  18273. 2025-07-20 16:14:41,099 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  18274. 2025-07-20 16:14:41,111 - sglang - INFO - [2025-07-20 16:14:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  18275. 2025-07-20 16:14:41,111 - __main__ - INFO - [2025-07-20 16:14:41 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  18276. 2025-07-20 16:14:41,111 - sglang - INFO - [2025-07-20 16:14:41 TP0] Memory pool end. avail mem=5.30 GB
  18277. 2025-07-20 16:14:41,111 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Memory pool end. avail mem=5.30 GB
  18278. 2025-07-20 16:14:41,330 - sglang - INFO - [2025-07-20 16:14:41 TP0] Capture cuda graph begin. This can take up to several minutes.
  18279. 2025-07-20 16:14:41,330 - __main__ - INFO - [2025-07-20 16:14:41 TP0] Capture cuda graph begin. This can take up to several minutes.
  18280. 2025-07-20 16:14:41,542 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  18281. 2025-07-20 16:14:42,609 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  18282. 2025-07-20 16:14:43,465 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.32s/it] 50%|█████ | 2/4 [00:01<00:01, 1.43it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.98it/s] 100%|██████████| 4/4 [00:02<00:00, 2.42it/s] 100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
  18283. 2025-07-20 16:14:43,465 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.32s/it] 50%|█████ | 2/4 [00:01<00:01, 1.43it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.98it/s] 100%|██████████| 4/4 [00:02<00:00, 2.42it/s] 100%|██████████| 4/4 [00:02<00:00, 1.88it/s]
  18284. 2025-07-20 16:14:43,465 - sglang - INFO - [2025-07-20 16:14:43 TP0] Capture cuda graph end. Time elapsed: 2.13 s
  18285. 2025-07-20 16:14:43,465 - __main__ - INFO - [2025-07-20 16:14:43 TP0] Capture cuda graph end. Time elapsed: 2.13 s
  18286. 2025-07-20 16:14:43,686 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  18287. 2025-07-20 16:14:44,203 - sglang - INFO - [2025-07-20 16:14:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  18288. 2025-07-20 16:14:44,203 - __main__ - INFO - [2025-07-20 16:14:44 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  18289. 2025-07-20 16:14:44,783 - __main__ - INFO - sglang server is ready.
  18290. 2025-07-20 16:14:44,783 - __main__ - INFO - Queue remaining: 1
  18291. 2025-07-20 16:14:44,783 - __main__ - INFO -
  18292. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18293. ----------------------------------------------------------------------------------
  18294. 2025-07-20 16:14:44,784 - __main__ - INFO -
  18295. Worker ID
  18296. ---------
  18297. 2025-07-20 16:14:44,784 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  18298. 2025-07-20 16:14:44,784 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  18299. 2025-07-20 16:14:44,796 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
  18300. 2025-07-20 16:14:44,799 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
  18301. 2025-07-20 16:14:44,801 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
  18302. 2025-07-20 16:14:44,803 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
  18303. 2025-07-20 16:14:44,805 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
  18304. 2025-07-20 16:14:44,807 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
  18305. 2025-07-20 16:14:44,809 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
  18306. 2025-07-20 16:14:44,812 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
  18307. 2025-07-20 16:14:44,814 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
  18308. 2025-07-20 16:14:44,816 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
  18309. 2025-07-20 16:14:44,818 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
  18310. 2025-07-20 16:14:44,820 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  18311. 2025-07-20 16:14:44,823 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
  18312. 2025-07-20 16:14:44,825 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
  18313. 2025-07-20 16:14:44,827 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
  18314. 2025-07-20 16:14:44,829 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
  18315. 2025-07-20 16:14:44,831 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
  18316. 2025-07-20 16:14:44,833 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
  18317. 2025-07-20 16:14:44,834 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
  18318. 2025-07-20 16:14:44,836 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
  18319. 2025-07-20 16:14:44,838 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
  18320. 2025-07-20 16:14:44,840 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
  18321. 2025-07-20 16:14:44,842 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
  18322. 2025-07-20 16:14:44,844 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
  18323. 2025-07-20 16:14:44,846 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
  18324. 2025-07-20 16:14:44,848 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
  18325. 2025-07-20 16:14:44,851 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
  18326. 2025-07-20 16:14:44,852 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
  18327. 2025-07-20 16:14:44,898 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
  18328. 2025-07-20 16:14:44,932 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
  18329. 2025-07-20 16:14:45,344 - sglang - INFO - [2025-07-20 16:14:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  18330. 2025-07-20 16:14:45,344 - __main__ - INFO - [2025-07-20 16:14:45 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  18331. 2025-07-20 16:14:45,344 - __main__ - INFO - sglang running req: 0 queue req: 0
  18332. 2025-07-20 16:14:50,937 - sglang - INFO - [2025-07-20 16:14:50] The server is fired up and ready to roll!
  18333. 2025-07-20 16:14:50,937 - __main__ - INFO - [2025-07-20 16:14:50] The server is fired up and ready to roll!
  18334. 2025-07-20 16:14:54,784 - __main__ - INFO - Queue remaining: 0
  18335. 2025-07-20 16:14:54,784 - __main__ - INFO -
  18336. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18337. ----------------------------------------------------------------------------------
  18338. 2025-07-20 16:14:54,784 - __main__ - INFO -
  18339. Worker ID | started
  18340. ----------+--------
  18341. 0 | 228
  18342. 2025-07-20 16:15:04,786 - __main__ - INFO - Queue remaining: 0
  18343. 2025-07-20 16:15:04,787 - __main__ - INFO -
  18344. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18345. ----------------------------------------------------------------------------------
  18346. 2025-07-20 16:15:04,787 - __main__ - INFO -
  18347. Worker ID | started
  18348. ----------+--------
  18349. 0 | 228
  18350. 2025-07-20 16:15:05,007 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
  18351. 2025-07-20 16:15:05,010 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
  18352. 2025-07-20 16:15:05,025 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
  18353. 2025-07-20 16:15:05,037 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
  18354. 2025-07-20 16:15:05,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
  18355. 2025-07-20 16:15:05,046 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
  18356. 2025-07-20 16:15:05,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
  18357. 2025-07-20 16:15:05,068 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
  18358. 2025-07-20 16:15:05,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  18359. 2025-07-20 16:15:05,089 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
  18360. 2025-07-20 16:15:05,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
  18361. 2025-07-20 16:15:05,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
  18362. 2025-07-20 16:15:05,148 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
  18363. 2025-07-20 16:15:05,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
  18364. 2025-07-20 16:15:05,150 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
  18365. 2025-07-20 16:15:05,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
  18366. 2025-07-20 16:15:05,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
  18367. 2025-07-20 16:15:05,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
  18368. 2025-07-20 16:15:05,158 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
  18369. 2025-07-20 16:15:05,180 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
  18370. 2025-07-20 16:15:05,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
  18371. 2025-07-20 16:15:05,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
  18372. 2025-07-20 16:15:05,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
  18373. 2025-07-20 16:15:05,238 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
  18374. 2025-07-20 16:15:05,264 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
  18375. 2025-07-20 16:15:05,266 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
  18376. 2025-07-20 16:15:05,267 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
  18377. 2025-07-20 16:15:05,268 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
  18378. 2025-07-20 16:15:05,269 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
  18379. 2025-07-20 16:15:05,285 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
  18380. 2025-07-20 16:15:05,285 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
  18381. 2025-07-20 16:15:05,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
  18382. 2025-07-20 16:15:05,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
  18383. 2025-07-20 16:15:05,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
  18384. 2025-07-20 16:15:05,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
  18385. 2025-07-20 16:15:05,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
  18386. 2025-07-20 16:15:05,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
  18387. 2025-07-20 16:15:05,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
  18388. 2025-07-20 16:15:05,375 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
  18389. 2025-07-20 16:15:05,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
  18390. 2025-07-20 16:15:05,378 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
  18391. 2025-07-20 16:15:05,381 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
  18392. 2025-07-20 16:15:05,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
  18393. 2025-07-20 16:15:05,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
  18394. 2025-07-20 16:15:05,455 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
  18395. 2025-07-20 16:15:05,458 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
  18396. 2025-07-20 16:15:05,462 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
  18397. 2025-07-20 16:15:05,464 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
  18398. 2025-07-20 16:15:05,469 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
  18399. 2025-07-20 16:15:05,482 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
  18400. 2025-07-20 16:15:05,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
  18401. 2025-07-20 16:15:05,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
  18402. 2025-07-20 16:15:05,565 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
  18403. 2025-07-20 16:15:05,634 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
  18404. 2025-07-20 16:15:05,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
  18405. 2025-07-20 16:15:05,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
  18406. 2025-07-20 16:15:05,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
  18407. 2025-07-20 16:15:05,656 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
  18408. 2025-07-20 16:15:05,657 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
  18409. 2025-07-20 16:15:05,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
  18410. 2025-07-20 16:15:05,734 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
  18411. 2025-07-20 16:15:05,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
  18412. 2025-07-20 16:15:05,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
  18413. 2025-07-20 16:15:05,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
  18414. 2025-07-20 16:15:05,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
  18415. 2025-07-20 16:15:05,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
  18416. 2025-07-20 16:15:05,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
  18417. 2025-07-20 16:15:05,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
  18418. 2025-07-20 16:15:05,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
  18419. 2025-07-20 16:15:05,757 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
  18420. 2025-07-20 16:15:05,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
  18421. 2025-07-20 16:15:05,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
  18422. 2025-07-20 16:15:05,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
  18423. 2025-07-20 16:15:05,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
  18424. 2025-07-20 16:15:05,838 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
  18425. 2025-07-20 16:15:05,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
  18426. 2025-07-20 16:15:05,851 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  18427. 2025-07-20 16:15:05,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  18428. 2025-07-20 16:15:05,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
  18429. 2025-07-20 16:15:05,943 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  18430. 2025-07-20 16:15:05,943 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
  18431. 2025-07-20 16:15:05,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
  18432. 2025-07-20 16:15:05,956 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
  18433. 2025-07-20 16:15:05,957 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
  18434. 2025-07-20 16:15:05,959 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  18435. 2025-07-20 16:15:06,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
  18436. 2025-07-20 16:15:06,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
  18437. 2025-07-20 16:15:06,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
  18438. 2025-07-20 16:15:06,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
  18439. 2025-07-20 16:15:06,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
  18440. 2025-07-20 16:15:06,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
  18441. 2025-07-20 16:15:06,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  18442. 2025-07-20 16:15:06,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  18443. 2025-07-20 16:15:06,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
  18444. 2025-07-20 16:15:06,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
  18445. 2025-07-20 16:15:06,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
  18446. 2025-07-20 16:15:06,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
  18447. 2025-07-20 16:15:06,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
  18448. 2025-07-20 16:15:06,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
  18449. 2025-07-20 16:15:06,152 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
  18450. 2025-07-20 16:15:06,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
  18451. 2025-07-20 16:15:06,155 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
  18452. 2025-07-20 16:15:06,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
  18453. 2025-07-20 16:15:06,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
  18454. 2025-07-20 16:15:06,237 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
  18455. 2025-07-20 16:15:06,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
  18456. 2025-07-20 16:15:06,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
  18457. 2025-07-20 16:15:06,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
  18458. 2025-07-20 16:15:06,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  18459. 2025-07-20 16:15:06,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
  18460. 2025-07-20 16:15:06,333 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
  18461. 2025-07-20 16:15:06,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
  18462. 2025-07-20 16:15:06,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
  18463. 2025-07-20 16:15:06,354 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
  18464. 2025-07-20 16:15:06,355 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
  18465. 2025-07-20 16:15:06,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
  18466. 2025-07-20 16:15:06,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
  18467. 2025-07-20 16:15:06,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
  18468. 2025-07-20 16:15:06,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
  18469. 2025-07-20 16:15:06,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
  18470. 2025-07-20 16:15:06,441 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
  18471. 2025-07-20 16:15:06,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
  18472. 2025-07-20 16:15:06,448 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
  18473. 2025-07-20 16:15:06,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
  18474. 2025-07-20 16:15:06,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
  18475. 2025-07-20 16:15:06,735 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
  18476. 2025-07-20 16:15:06,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
  18477. 2025-07-20 16:15:06,744 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
  18478. 2025-07-20 16:15:06,748 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
  18479. 2025-07-20 16:15:06,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
  18480. 2025-07-20 16:15:06,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
  18481. 2025-07-20 16:15:06,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
  18482. 2025-07-20 16:15:06,833 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
  18483. 2025-07-20 16:15:06,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
  18484. 2025-07-20 16:15:06,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
  18485. 2025-07-20 16:15:07,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
  18486. 2025-07-20 16:15:07,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
  18487. 2025-07-20 16:15:07,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
  18488. 2025-07-20 16:15:07,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
  18489. 2025-07-20 16:15:07,047 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
  18490. 2025-07-20 16:15:07,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
  18491. 2025-07-20 16:15:07,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
  18492. 2025-07-20 16:15:07,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
  18493. 2025-07-20 16:15:07,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
  18494. 2025-07-20 16:15:07,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
  18495. 2025-07-20 16:15:07,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
  18496. 2025-07-20 16:15:07,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
  18497. 2025-07-20 16:15:07,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
  18498. 2025-07-20 16:15:07,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
  18499. 2025-07-20 16:15:07,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
  18500. 2025-07-20 16:15:07,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
  18501. 2025-07-20 16:15:07,146 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
  18502. 2025-07-20 16:15:07,147 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
  18503. 2025-07-20 16:15:07,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
  18504. 2025-07-20 16:15:07,197 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
  18505. 2025-07-20 16:15:07,197 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  18506. 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
  18507. 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
  18508. 2025-07-20 16:15:07,198 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
  18509. 2025-07-20 16:15:07,199 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
  18510. 2025-07-20 16:15:07,199 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
  18511. 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
  18512. 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
  18513. 2025-07-20 16:15:07,200 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
  18514. 2025-07-20 16:15:07,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
  18515. 2025-07-20 16:15:07,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
  18516. 2025-07-20 16:15:07,235 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
  18517. 2025-07-20 16:15:07,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
  18518. 2025-07-20 16:15:07,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
  18519. 2025-07-20 16:15:07,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
  18520. 2025-07-20 16:15:07,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
  18521. 2025-07-20 16:15:07,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
  18522. 2025-07-20 16:15:07,252 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
  18523. 2025-07-20 16:15:07,254 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
  18524. 2025-07-20 16:15:07,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
  18525. 2025-07-20 16:15:07,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
  18526. 2025-07-20 16:15:07,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
  18527. 2025-07-20 16:15:07,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
  18528. 2025-07-20 16:15:07,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
  18529. 2025-07-20 16:15:07,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
  18530. 2025-07-20 16:15:07,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
  18531. 2025-07-20 16:15:07,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
  18532. 2025-07-20 16:15:07,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
  18533. 2025-07-20 16:15:07,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
  18534. 2025-07-20 16:15:07,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
  18535. 2025-07-20 16:15:07,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
  18536. 2025-07-20 16:15:07,348 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
  18537. 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
  18538. 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
  18539. 2025-07-20 16:15:07,449 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
  18540. 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
  18541. 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
  18542. 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
  18543. 2025-07-20 16:15:07,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
  18544. 2025-07-20 16:15:07,452 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
  18545. 2025-07-20 16:15:07,453 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
  18546. 2025-07-20 16:15:07,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
  18547. 2025-07-20 16:15:07,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
  18548. 2025-07-20 16:15:07,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  18549. 2025-07-20 16:15:07,458 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
  18550. 2025-07-20 16:15:07,459 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
  18551. 2025-07-20 16:15:07,460 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
  18552. 2025-07-20 16:15:07,462 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
  18553. 2025-07-20 16:15:07,463 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
  18554. 2025-07-20 16:15:07,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
  18555. 2025-07-20 16:15:07,537 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
  18556. 2025-07-20 16:15:07,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
  18557. 2025-07-20 16:15:07,539 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  18558. 2025-07-20 16:15:07,540 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
  18559. 2025-07-20 16:15:07,542 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
  18560. 2025-07-20 16:15:07,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
  18561. 2025-07-20 16:15:07,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
  18562. 2025-07-20 16:15:07,545 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
  18563. 2025-07-20 16:15:07,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
  18564. 2025-07-20 16:15:07,548 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
  18565. 2025-07-20 16:15:07,556 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
  18566. 2025-07-20 16:15:07,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
  18567. 2025-07-20 16:15:07,643 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
  18568. 2025-07-20 16:15:07,644 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
  18569. 2025-07-20 16:15:07,645 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
  18570. 2025-07-20 16:15:07,646 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
  18571. 2025-07-20 16:15:07,647 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
  18572. 2025-07-20 16:15:07,649 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
  18573. 2025-07-20 16:15:07,649 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
  18574. 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
  18575. 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
  18576. 2025-07-20 16:15:07,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
  18577. 2025-07-20 16:15:08,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
  18578. 2025-07-20 16:15:14,788 - __main__ - INFO - Queue remaining: 0
  18579. 2025-07-20 16:15:14,788 - __main__ - INFO -
  18580. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18581. ----------------------------------------------------------------------------------
  18582. 2025-07-20 16:15:14,832 - __main__ - INFO -
  18583. Worker ID | started
  18584. ----------+--------
  18585. 0 | 228
  18586. 2025-07-20 16:18:28,967 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  18587. 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  18588. 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  18589. 2025-07-20 16:18:28,968 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  18590. 2025-07-20 16:18:28,969 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  18591. 2025-07-20 16:18:28,969 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  18592. 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  18593. 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  18594. 2025-07-20 16:18:28,970 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  18595. 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  18596. 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  18597. 2025-07-20 16:18:28,971 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  18598. 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  18599. 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  18600. 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  18601. 2025-07-20 16:18:28,972 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  18602. 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  18603. 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  18604. 2025-07-20 16:18:28,973 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  18605. 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  18606. 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  18607. 2025-07-20 16:18:28,974 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  18608. 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  18609. 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  18610. 2025-07-20 16:18:28,975 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  18611. 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  18612. 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  18613. 2025-07-20 16:18:28,976 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  18614. 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  18615. 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  18616. 2025-07-20 16:18:28,977 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  18617. 2025-07-20 16:18:28,977 - __main__ - INFO - Found 30 total pdf paths to add
  18618. 2025-07-20 16:18:29,059 - __main__ - INFO - Calculated items_per_group: 65 based on average pages per PDF: 7.60
  18619. 2025-07-20 16:18:29,244 - __main__ - INFO - Starting pipeline with PID 613024
  18620. 2025-07-20 16:18:29,244 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  18621. 2025-07-20 16:18:29,324 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  18622. 2025-07-20 16:18:30,353 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  18623. 2025-07-20 16:18:31,464 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  18624. 2025-07-20 16:18:32,575 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  18625. 2025-07-20 16:18:33,606 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  18626. 2025-07-20 16:18:34,646 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  18627. 2025-07-20 16:18:35,785 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  18628. 2025-07-20 16:18:36,849 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  18629. 2025-07-20 16:18:37,680 - sglang - INFO - [2025-07-20 16:18:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192343309, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  18630. 2025-07-20 16:18:37,680 - __main__ - INFO - [2025-07-20 16:18:37] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192343309, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  18631. 2025-07-20 16:18:38,005 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  18632. 2025-07-20 16:18:38,814 - sglang - INFO - [2025-07-20 16:18:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  18633. 2025-07-20 16:18:38,814 - __main__ - INFO - [2025-07-20 16:18:38] Use chat template for the OpenAI-compatible API server: qwen2-vl
  18634. 2025-07-20 16:18:39,081 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  18635. 2025-07-20 16:18:40,153 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  18636. 2025-07-20 16:18:41,222 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  18637. 2025-07-20 16:18:42,289 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  18638. 2025-07-20 16:18:43,365 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  18639. 2025-07-20 16:18:44,417 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  18640. 2025-07-20 16:18:44,888 - sglang - INFO - [2025-07-20 16:18:44 TP0] Overlap scheduler is disabled for multimodal models.
  18641. 2025-07-20 16:18:44,888 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Overlap scheduler is disabled for multimodal models.
  18642. 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  18643. 2025-07-20 16:18:44,891 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  18644. 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  18645. 2025-07-20 16:18:44,891 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  18646. 2025-07-20 16:18:44,891 - sglang - INFO - [2025-07-20 16:18:44 TP0] Init torch distributed begin.
  18647. 2025-07-20 16:18:44,892 - __main__ - INFO - [2025-07-20 16:18:44 TP0] Init torch distributed begin.
  18648. 2025-07-20 16:18:45,506 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  18649. 2025-07-20 16:18:46,579 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  18650. 2025-07-20 16:18:47,612 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  18651. 2025-07-20 16:18:48,660 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  18652. 2025-07-20 16:18:49,698 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  18653. 2025-07-20 16:18:50,309 - sglang - INFO - [2025-07-20 16:18:50 TP0] Load weight begin. avail mem=23.33 GB
  18654. 2025-07-20 16:18:50,309 - __main__ - INFO - [2025-07-20 16:18:50 TP0] Load weight begin. avail mem=23.33 GB
  18655. 2025-07-20 16:18:50,770 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  18656. 2025-07-20 16:18:51,075 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  18657. 2025-07-20 16:18:51,075 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  18658. 2025-07-20 16:18:51,844 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  18659. 2025-07-20 16:18:52,912 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  18660. 2025-07-20 16:18:53,980 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  18661. 2025-07-20 16:18:55,049 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  18662. 2025-07-20 16:18:56,105 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  18663. 2025-07-20 16:18:57,153 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  18664. 2025-07-20 16:18:58,218 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  18665. 2025-07-20 16:18:59,285 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  18666. 2025-07-20 16:19:00,356 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  18667. 2025-07-20 16:19:01,420 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  18668. 2025-07-20 16:19:02,454 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  18669. 2025-07-20 16:19:03,513 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  18670. 2025-07-20 16:19:04,081 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.01s/it]
  18671. 2025-07-20 16:19:04,082 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.01s/it]
  18672. 2025-07-20 16:19:04,589 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  18673. 2025-07-20 16:19:05,678 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  18674. 2025-07-20 16:19:06,741 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  18675. 2025-07-20 16:19:07,809 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  18676. 2025-07-20 16:19:08,876 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  18677. 2025-07-20 16:19:09,923 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  18678. 2025-07-20 16:19:10,988 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  18679. 2025-07-20 16:19:12,056 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  18680. 2025-07-20 16:19:13,126 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  18681. 2025-07-20 16:19:14,196 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  18682. 2025-07-20 16:19:15,260 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  18683. 2025-07-20 16:19:16,329 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  18684. 2025-07-20 16:19:17,395 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.18s/it]
  18685. 2025-07-20 16:19:17,396 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.18s/it]
  18686. 2025-07-20 16:19:17,397 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  18687. 2025-07-20 16:19:18,470 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  18688. 2025-07-20 16:19:19,525 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  18689. 2025-07-20 16:19:20,596 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  18690. 2025-07-20 16:19:21,663 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  18691. 2025-07-20 16:19:22,730 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  18692. 2025-07-20 16:19:23,798 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  18693. 2025-07-20 16:19:24,865 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  18694. 2025-07-20 16:19:25,937 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  18695. 2025-07-20 16:19:27,006 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  18696. 2025-07-20 16:19:28,074 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  18697. 2025-07-20 16:19:29,142 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  18698. 2025-07-20 16:19:30,212 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  18699. 2025-07-20 16:19:30,327 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
  18700. 2025-07-20 16:19:30,328 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
  18701. 2025-07-20 16:19:31,290 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  18702. 2025-07-20 16:19:32,359 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  18703. 2025-07-20 16:19:33,430 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  18704. 2025-07-20 16:19:34,496 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  18705. 2025-07-20 16:19:34,890 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.71s/it]
  18706. 2025-07-20 16:19:34,890 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.71s/it]
  18707. 2025-07-20 16:19:34,890 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  18708. 2025-07-20 16:19:34,890 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  18709. 2025-07-20 16:19:34,891 - sglang - INFO -
  18710. 2025-07-20 16:19:34,891 - __main__ - INFO -
  18711. 2025-07-20 16:19:34,982 - sglang - INFO - [2025-07-20 16:19:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  18712. 2025-07-20 16:19:34,982 - __main__ - INFO - [2025-07-20 16:19:34 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  18713. 2025-07-20 16:19:34,994 - sglang - INFO - [2025-07-20 16:19:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  18714. 2025-07-20 16:19:34,995 - __main__ - INFO - [2025-07-20 16:19:34 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  18715. 2025-07-20 16:19:34,995 - sglang - INFO - [2025-07-20 16:19:34 TP0] Memory pool end. avail mem=5.30 GB
  18716. 2025-07-20 16:19:34,995 - __main__ - INFO - [2025-07-20 16:19:34 TP0] Memory pool end. avail mem=5.30 GB
  18717. 2025-07-20 16:19:35,237 - sglang - INFO - [2025-07-20 16:19:35 TP0] Capture cuda graph begin. This can take up to several minutes.
  18718. 2025-07-20 16:19:35,237 - __main__ - INFO - [2025-07-20 16:19:35 TP0] Capture cuda graph begin. This can take up to several minutes.
  18719. 2025-07-20 16:19:35,532 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  18720. 2025-07-20 16:19:36,582 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  18721. 2025-07-20 16:19:37,572 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.52s/it] 50%|█████ | 2/4 [00:01<00:01, 1.26it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.81it/s] 100%|██████████| 4/4 [00:02<00:00, 2.28it/s] 100%|██████████| 4/4 [00:02<00:00, 1.72it/s]
  18722. 2025-07-20 16:19:37,573 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.52s/it] 50%|█████ | 2/4 [00:01<00:01, 1.26it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.81it/s] 100%|██████████| 4/4 [00:02<00:00, 2.28it/s] 100%|██████████| 4/4 [00:02<00:00, 1.72it/s]
  18723. 2025-07-20 16:19:37,573 - sglang - INFO - [2025-07-20 16:19:37 TP0] Capture cuda graph end. Time elapsed: 2.34 s
  18724. 2025-07-20 16:19:37,573 - __main__ - INFO - [2025-07-20 16:19:37 TP0] Capture cuda graph end. Time elapsed: 2.34 s
  18725. 2025-07-20 16:19:37,658 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  18726. 2025-07-20 16:19:38,318 - sglang - INFO - [2025-07-20 16:19:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  18727. 2025-07-20 16:19:38,318 - __main__ - INFO - [2025-07-20 16:19:38 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  18728. 2025-07-20 16:19:38,758 - __main__ - INFO - sglang server is ready.
  18729. 2025-07-20 16:19:38,758 - __main__ - INFO - Queue remaining: 1
  18730. 2025-07-20 16:19:38,758 - __main__ - INFO -
  18731. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18732. ----------------------------------------------------------------------------------
  18733. 2025-07-20 16:19:38,758 - __main__ - INFO -
  18734. Worker ID
  18735. ---------
  18736. 2025-07-20 16:19:38,758 - __main__ - INFO - Worker 0 processing work item 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  18737. 2025-07-20 16:19:38,759 - __main__ - INFO - Created all tasks for 5ed24a7ae2761a17fb214db8f051d7b48316e4cf
  18738. 2025-07-20 16:19:38,772 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
  18739. 2025-07-20 16:19:38,774 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
  18740. 2025-07-20 16:19:38,777 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
  18741. 2025-07-20 16:19:38,779 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
  18742. 2025-07-20 16:19:38,781 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
  18743. 2025-07-20 16:19:38,783 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
  18744. 2025-07-20 16:19:38,785 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
  18745. 2025-07-20 16:19:38,787 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
  18746. 2025-07-20 16:19:38,790 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
  18747. 2025-07-20 16:19:38,792 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
  18748. 2025-07-20 16:19:38,794 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
  18749. 2025-07-20 16:19:38,796 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
  18750. 2025-07-20 16:19:38,798 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  18751. 2025-07-20 16:19:38,801 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
  18752. 2025-07-20 16:19:38,803 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
  18753. 2025-07-20 16:19:38,805 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
  18754. 2025-07-20 16:19:38,806 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
  18755. 2025-07-20 16:19:38,808 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
  18756. 2025-07-20 16:19:38,810 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
  18757. 2025-07-20 16:19:38,812 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
  18758. 2025-07-20 16:19:38,813 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
  18759. 2025-07-20 16:19:38,815 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
  18760. 2025-07-20 16:19:38,817 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
  18761. 2025-07-20 16:19:38,819 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
  18762. 2025-07-20 16:19:38,821 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
  18763. 2025-07-20 16:19:38,823 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
  18764. 2025-07-20 16:19:38,825 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
  18765. 2025-07-20 16:19:38,827 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
  18766. 2025-07-20 16:19:38,936 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
  18767. 2025-07-20 16:19:38,940 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
  18768. 2025-07-20 16:19:39,442 - sglang - INFO - [2025-07-20 16:19:39 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  18769. 2025-07-20 16:19:39,442 - __main__ - INFO - [2025-07-20 16:19:39 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  18770. 2025-07-20 16:19:39,442 - __main__ - INFO - sglang running req: 0 queue req: 0
  18771. 2025-07-20 16:19:48,040 - sglang - INFO - [2025-07-20 16:19:48] The server is fired up and ready to roll!
  18772. 2025-07-20 16:19:48,041 - __main__ - INFO - [2025-07-20 16:19:48] The server is fired up and ready to roll!
  18773. 2025-07-20 16:19:48,833 - __main__ - INFO - Queue remaining: 0
  18774. 2025-07-20 16:19:48,833 - __main__ - INFO -
  18775. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18776. ----------------------------------------------------------------------------------
  18777. 2025-07-20 16:19:48,834 - __main__ - INFO -
  18778. Worker ID | started
  18779. ----------+--------
  18780. 0 | 228
  18781. 2025-07-20 16:19:58,820 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
  18782. 2025-07-20 16:19:58,828 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
  18783. 2025-07-20 16:19:58,834 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
  18784. 2025-07-20 16:19:58,834 - __main__ - INFO - Queue remaining: 0
  18785. 2025-07-20 16:19:58,835 - __main__ - INFO -
  18786. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  18787. ----------------------------------------------------------------------------------
  18788. 2025-07-20 16:19:58,835 - __main__ - INFO -
  18789. Worker ID | started
  18790. ----------+--------
  18791. 0 | 228
  18792. 2025-07-20 16:19:58,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
  18793. 2025-07-20 16:19:58,857 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
  18794. 2025-07-20 16:19:58,859 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
  18795. 2025-07-20 16:19:58,859 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
  18796. 2025-07-20 16:19:58,878 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  18797. 2025-07-20 16:19:58,883 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
  18798. 2025-07-20 16:19:58,884 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
  18799. 2025-07-20 16:19:58,939 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
  18800. 2025-07-20 16:19:58,939 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
  18801. 2025-07-20 16:19:58,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
  18802. 2025-07-20 16:19:58,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
  18803. 2025-07-20 16:19:58,952 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
  18804. 2025-07-20 16:19:58,954 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
  18805. 2025-07-20 16:19:58,955 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
  18806. 2025-07-20 16:19:58,963 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
  18807. 2025-07-20 16:19:58,964 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
  18808. 2025-07-20 16:19:58,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
  18809. 2025-07-20 16:19:58,970 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
  18810. 2025-07-20 16:19:58,974 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
  18811. 2025-07-20 16:19:58,979 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
  18812. 2025-07-20 16:19:59,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
  18813. 2025-07-20 16:19:59,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
  18814. 2025-07-20 16:19:59,049 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
  18815. 2025-07-20 16:19:59,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
  18816. 2025-07-20 16:19:59,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
  18817. 2025-07-20 16:19:59,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
  18818. 2025-07-20 16:19:59,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
  18819. 2025-07-20 16:19:59,057 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
  18820. 2025-07-20 16:19:59,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
  18821. 2025-07-20 16:19:59,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
  18822. 2025-07-20 16:19:59,060 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
  18823. 2025-07-20 16:19:59,061 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
  18824. 2025-07-20 16:19:59,063 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
  18825. 2025-07-20 16:19:59,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
  18826. 2025-07-20 16:19:59,144 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
  18827. 2025-07-20 16:19:59,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
  18828. 2025-07-20 16:19:59,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
  18829. 2025-07-20 16:19:59,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
  18830. 2025-07-20 16:19:59,234 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
  18831. 2025-07-20 16:19:59,235 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
  18832. 2025-07-20 16:19:59,236 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
  18833. 2025-07-20 16:19:59,244 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
  18834. 2025-07-20 16:19:59,245 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
  18835. 2025-07-20 16:19:59,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
  18836. 2025-07-20 16:19:59,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
  18837. 2025-07-20 16:19:59,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
  18838. 2025-07-20 16:19:59,252 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
  18839. 2025-07-20 16:19:59,257 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
  18840. 2025-07-20 16:19:59,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
  18841. 2025-07-20 16:19:59,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
  18842. 2025-07-20 16:19:59,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
  18843. 2025-07-20 16:19:59,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
  18844. 2025-07-20 16:19:59,348 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
  18845. 2025-07-20 16:19:59,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
  18846. 2025-07-20 16:19:59,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
  18847. 2025-07-20 16:19:59,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
  18848. 2025-07-20 16:19:59,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
  18849. 2025-07-20 16:19:59,435 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
  18850. 2025-07-20 16:19:59,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
  18851. 2025-07-20 16:19:59,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
  18852. 2025-07-20 16:19:59,439 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
  18853. 2025-07-20 16:19:59,440 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
  18854. 2025-07-20 16:19:59,441 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
  18855. 2025-07-20 16:19:59,442 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
  18856. 2025-07-20 16:19:59,443 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
  18857. 2025-07-20 16:19:59,444 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
  18858. 2025-07-20 16:19:59,448 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
  18859. 2025-07-20 16:19:59,533 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
  18860. 2025-07-20 16:19:59,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  18861. 2025-07-20 16:19:59,537 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
  18862. 2025-07-20 16:19:59,636 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
  18863. 2025-07-20 16:19:59,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
  18864. 2025-07-20 16:19:59,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
  18865. 2025-07-20 16:19:59,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  18866. 2025-07-20 16:19:59,641 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
  18867. 2025-07-20 16:19:59,642 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
  18868. 2025-07-20 16:19:59,643 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
  18869. 2025-07-20 16:19:59,645 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
  18870. 2025-07-20 16:19:59,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  18871. 2025-07-20 16:19:59,650 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  18872. 2025-07-20 16:19:59,652 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
  18873. 2025-07-20 16:19:59,737 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
  18874. 2025-07-20 16:19:59,737 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
  18875. 2025-07-20 16:19:59,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
  18876. 2025-07-20 16:19:59,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
  18877. 2025-07-20 16:19:59,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
  18878. 2025-07-20 16:19:59,746 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
  18879. 2025-07-20 16:19:59,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
  18880. 2025-07-20 16:19:59,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
  18881. 2025-07-20 16:19:59,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
  18882. 2025-07-20 16:19:59,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
  18883. 2025-07-20 16:19:59,754 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
  18884. 2025-07-20 16:19:59,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
  18885. 2025-07-20 16:19:59,756 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
  18886. 2025-07-20 16:19:59,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
  18887. 2025-07-20 16:19:59,835 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
  18888. 2025-07-20 16:19:59,837 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
  18889. 2025-07-20 16:19:59,852 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
  18890. 2025-07-20 16:19:59,853 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
  18891. 2025-07-20 16:19:59,856 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
  18892. 2025-07-20 16:19:59,935 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
  18893. 2025-07-20 16:20:00,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
  18894. 2025-07-20 16:20:00,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
  18895. 2025-07-20 16:20:00,042 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
  18896. 2025-07-20 16:20:00,044 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
  18897. 2025-07-20 16:20:00,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
  18898. 2025-07-20 16:20:00,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
  18899. 2025-07-20 16:20:00,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
  18900. 2025-07-20 16:20:00,053 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
  18901. 2025-07-20 16:20:00,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
  18902. 2025-07-20 16:20:00,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  18903. 2025-07-20 16:20:00,136 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
  18904. 2025-07-20 16:20:00,138 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
  18905. 2025-07-20 16:20:00,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  18906. 2025-07-20 16:20:00,141 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
  18907. 2025-07-20 16:20:00,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
  18908. 2025-07-20 16:20:00,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
  18909. 2025-07-20 16:20:00,333 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
  18910. 2025-07-20 16:20:00,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
  18911. 2025-07-20 16:20:00,334 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
  18912. 2025-07-20 16:20:00,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
  18913. 2025-07-20 16:20:00,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
  18914. 2025-07-20 16:20:00,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
  18915. 2025-07-20 16:20:00,339 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
  18916. 2025-07-20 16:20:00,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  18917. 2025-07-20 16:20:00,349 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
  18918. 2025-07-20 16:20:00,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
  18919. 2025-07-20 16:20:00,352 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
  18920. 2025-07-20 16:20:00,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
  18921. 2025-07-20 16:20:00,550 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
  18922. 2025-07-20 16:20:00,552 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
  18923. 2025-07-20 16:20:00,635 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
  18924. 2025-07-20 16:20:00,738 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
  18925. 2025-07-20 16:20:00,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
  18926. 2025-07-20 16:20:00,743 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
  18927. 2025-07-20 16:20:00,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
  18928. 2025-07-20 16:20:00,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
  18929. 2025-07-20 16:20:00,789 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
  18930. 2025-07-20 16:20:00,789 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
  18931. 2025-07-20 16:20:00,790 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
  18932. 2025-07-20 16:20:00,834 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
  18933. 2025-07-20 16:20:00,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
  18934. 2025-07-20 16:20:00,837 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
  18935. 2025-07-20 16:20:00,840 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
  18936. 2025-07-20 16:20:00,842 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
  18937. 2025-07-20 16:20:00,843 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
  18938. 2025-07-20 16:20:00,944 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
  18939. 2025-07-20 16:20:00,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
  18940. 2025-07-20 16:20:00,946 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
  18941. 2025-07-20 16:20:00,948 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
  18942. 2025-07-20 16:20:00,950 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
  18943. 2025-07-20 16:20:00,952 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
  18944. 2025-07-20 16:20:00,954 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
  18945. 2025-07-20 16:20:00,958 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
  18946. 2025-07-20 16:20:00,961 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
  18947. 2025-07-20 16:20:00,964 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
  18948. 2025-07-20 16:20:00,966 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
  18949. 2025-07-20 16:20:00,966 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
  18950. 2025-07-20 16:20:00,968 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
  18951. 2025-07-20 16:20:01,034 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
  18952. 2025-07-20 16:20:01,035 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
  18953. 2025-07-20 16:20:01,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
  18954. 2025-07-20 16:20:01,036 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
  18955. 2025-07-20 16:20:01,039 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
  18956. 2025-07-20 16:20:01,047 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
  18957. 2025-07-20 16:20:01,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
  18958. 2025-07-20 16:20:01,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
  18959. 2025-07-20 16:20:01,051 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  18960. 2025-07-20 16:20:01,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
  18961. 2025-07-20 16:20:01,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
  18962. 2025-07-20 16:20:01,054 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
  18963. 2025-07-20 16:20:01,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
  18964. 2025-07-20 16:20:01,056 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
  18965. 2025-07-20 16:20:01,058 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
  18966. 2025-07-20 16:20:01,060 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  18967. 2025-07-20 16:20:01,062 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
  18968. 2025-07-20 16:20:01,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
  18969. 2025-07-20 16:20:01,066 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
  18970. 2025-07-20 16:20:01,068 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
  18971. 2025-07-20 16:20:01,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
  18972. 2025-07-20 16:20:01,137 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
  18973. 2025-07-20 16:20:01,138 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
  18974. 2025-07-20 16:20:01,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
  18975. 2025-07-20 16:20:01,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
  18976. 2025-07-20 16:20:01,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
  18977. 2025-07-20 16:20:01,153 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
  18978. 2025-07-20 16:20:01,155 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
  18979. 2025-07-20 16:20:01,157 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
  18980. 2025-07-20 16:20:01,159 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
  18981. 2025-07-20 16:20:01,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
  18982. 2025-07-20 16:20:01,248 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
  18983. 2025-07-20 16:20:01,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
  18984. 2025-07-20 16:20:01,249 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
  18985. 2025-07-20 16:20:01,250 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
  18986. 2025-07-20 16:20:01,250 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
  18987. 2025-07-20 16:20:01,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
  18988. 2025-07-20 16:20:01,257 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
  18989. 2025-07-20 16:20:01,260 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
  18990. 2025-07-20 16:20:01,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
  18991. 2025-07-20 16:20:01,265 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
  18992. 2025-07-20 16:20:01,266 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
  18993. 2025-07-20 16:20:01,267 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
  18994. 2025-07-20 16:20:01,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
  18995. 2025-07-20 16:20:01,340 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
  18996. 2025-07-20 16:20:01,342 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
  18997. 2025-07-20 16:20:01,343 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  18998. 2025-07-20 16:20:01,346 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
  18999. 2025-07-20 16:20:01,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
  19000. 2025-07-20 16:20:01,353 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
  19001. 2025-07-20 16:20:01,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
  19002. 2025-07-20 16:20:01,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
  19003. 2025-07-20 16:20:01,361 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
  19004. 2025-07-20 16:20:01,363 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
  19005. 2025-07-20 16:20:01,466 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
  19006. 2025-07-20 16:20:01,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
  19007. 2025-07-20 16:20:01,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
  19008. 2025-07-20 16:20:01,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
  19009. 2025-07-20 16:20:01,543 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
  19010. 2025-07-20 16:20:01,546 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
  19011. 2025-07-20 16:20:01,547 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
  19012. 2025-07-20 16:20:01,551 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
  19013. 2025-07-20 16:20:01,553 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
  19014. 2025-07-20 16:20:01,557 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
  19015. 2025-07-20 16:20:01,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
  19016. 2025-07-20 16:20:01,641 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
  19017. 2025-07-20 16:20:08,836 - __main__ - INFO - Queue remaining: 0
  19018. 2025-07-20 16:20:08,836 - __main__ - INFO -
  19019. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19020. ----------------------------------------------------------------------------------
  19021. 2025-07-20 16:20:08,838 - __main__ - INFO -
  19022. Worker ID | started
  19023. ----------+--------
  19024. 0 | 228
  19025. 2025-07-20 16:33:56,631 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  19026. 2025-07-20 16:33:56,632 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  19027. 2025-07-20 16:33:56,632 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  19028. 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  19029. 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  19030. 2025-07-20 16:33:56,633 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  19031. 2025-07-20 16:33:56,634 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  19032. 2025-07-20 16:33:56,634 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  19033. 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  19034. 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  19035. 2025-07-20 16:33:56,635 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  19036. 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  19037. 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  19038. 2025-07-20 16:33:56,636 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  19039. 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  19040. 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  19041. 2025-07-20 16:33:56,637 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  19042. 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  19043. 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  19044. 2025-07-20 16:33:56,638 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  19045. 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  19046. 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  19047. 2025-07-20 16:33:56,639 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  19048. 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  19049. 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  19050. 2025-07-20 16:33:56,640 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  19051. 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  19052. 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  19053. 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  19054. 2025-07-20 16:33:56,641 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  19055. 2025-07-20 16:33:56,642 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  19056. 2025-07-20 16:33:56,642 - __main__ - INFO - Found 30 total pdf paths to add
  19057. 2025-07-20 16:33:56,712 - __main__ - INFO - Calculated items_per_group: 6 based on average pages per PDF: 7.60
  19058. 2025-07-20 16:33:56,920 - __main__ - INFO - Starting pipeline with PID 617074
  19059. 2025-07-20 16:33:56,920 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  19060. 2025-07-20 16:33:57,002 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  19061. 2025-07-20 16:33:58,034 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  19062. 2025-07-20 16:33:59,069 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  19063. 2025-07-20 16:34:00,132 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  19064. 2025-07-20 16:34:01,200 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  19065. 2025-07-20 16:34:02,267 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  19066. 2025-07-20 16:34:03,336 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  19067. 2025-07-20 16:34:04,405 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  19068. 2025-07-20 16:34:05,473 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  19069. 2025-07-20 16:34:05,746 - sglang - INFO - [2025-07-20 16:34:05] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=464142597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19070. 2025-07-20 16:34:05,746 - __main__ - INFO - [2025-07-20 16:34:05] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=464142597, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19071. 2025-07-20 16:34:06,569 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  19072. 2025-07-20 16:34:07,018 - sglang - INFO - [2025-07-20 16:34:07] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19073. 2025-07-20 16:34:07,018 - __main__ - INFO - [2025-07-20 16:34:07] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19074. 2025-07-20 16:34:07,647 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  19075. 2025-07-20 16:34:08,718 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  19076. 2025-07-20 16:34:09,773 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  19077. 2025-07-20 16:34:10,849 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  19078. 2025-07-20 16:34:11,914 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  19079. 2025-07-20 16:34:12,987 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  19080. 2025-07-20 16:34:13,404 - sglang - INFO - [2025-07-20 16:34:13 TP0] Overlap scheduler is disabled for multimodal models.
  19081. 2025-07-20 16:34:13,404 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Overlap scheduler is disabled for multimodal models.
  19082. 2025-07-20 16:34:13,406 - sglang - INFO - [2025-07-20 16:34:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19083. 2025-07-20 16:34:13,406 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19084. 2025-07-20 16:34:13,407 - sglang - INFO - [2025-07-20 16:34:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19085. 2025-07-20 16:34:13,407 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19086. 2025-07-20 16:34:13,407 - sglang - INFO - [2025-07-20 16:34:13 TP0] Init torch distributed begin.
  19087. 2025-07-20 16:34:13,407 - __main__ - INFO - [2025-07-20 16:34:13 TP0] Init torch distributed begin.
  19088. 2025-07-20 16:34:14,050 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  19089. 2025-07-20 16:34:15,103 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  19090. 2025-07-20 16:34:16,148 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  19091. 2025-07-20 16:34:17,212 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  19092. 2025-07-20 16:34:18,281 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  19093. 2025-07-20 16:34:19,042 - sglang - INFO - [2025-07-20 16:34:19 TP0] Load weight begin. avail mem=23.33 GB
  19094. 2025-07-20 16:34:19,042 - __main__ - INFO - [2025-07-20 16:34:19 TP0] Load weight begin. avail mem=23.33 GB
  19095. 2025-07-20 16:34:19,359 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  19096. 2025-07-20 16:34:19,831 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19097. 2025-07-20 16:34:19,831 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19098. 2025-07-20 16:34:20,438 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  19099. 2025-07-20 16:34:21,508 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  19100. 2025-07-20 16:34:22,578 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  19101. 2025-07-20 16:34:23,610 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  19102. 2025-07-20 16:34:24,659 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  19103. 2025-07-20 16:34:25,724 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  19104. 2025-07-20 16:34:26,790 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  19105. 2025-07-20 16:34:27,860 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  19106. 2025-07-20 16:34:28,930 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  19107. 2025-07-20 16:34:29,999 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  19108. 2025-07-20 16:34:31,065 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  19109. 2025-07-20 16:34:32,127 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  19110. 2025-07-20 16:34:32,771 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.94s/it]
  19111. 2025-07-20 16:34:32,771 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.94s/it]
  19112. 2025-07-20 16:34:33,192 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  19113. 2025-07-20 16:34:34,257 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  19114. 2025-07-20 16:34:35,330 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  19115. 2025-07-20 16:34:36,398 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  19116. 2025-07-20 16:34:37,467 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  19117. 2025-07-20 16:34:38,538 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  19118. 2025-07-20 16:34:39,570 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  19119. 2025-07-20 16:34:40,633 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  19120. 2025-07-20 16:34:41,699 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  19121. 2025-07-20 16:34:42,764 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  19122. 2025-07-20 16:34:43,819 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  19123. 2025-07-20 16:34:44,924 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  19124. 2025-07-20 16:34:45,782 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.98s/it]
  19125. 2025-07-20 16:34:45,783 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.98s/it]
  19126. 2025-07-20 16:34:45,974 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  19127. 2025-07-20 16:34:47,016 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  19128. 2025-07-20 16:34:48,071 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  19129. 2025-07-20 16:34:49,137 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  19130. 2025-07-20 16:34:50,199 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  19131. 2025-07-20 16:34:51,254 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  19132. 2025-07-20 16:34:52,309 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  19133. 2025-07-20 16:34:53,372 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  19134. 2025-07-20 16:34:54,437 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  19135. 2025-07-20 16:34:55,516 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  19136. 2025-07-20 16:34:56,582 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  19137. 2025-07-20 16:34:57,647 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  19138. 2025-07-20 16:34:58,522 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:38<00:12, 12.87s/it]
  19139. 2025-07-20 16:34:58,523 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:38<00:12, 12.87s/it]
  19140. 2025-07-20 16:34:58,725 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  19141. 2025-07-20 16:34:59,793 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  19142. 2025-07-20 16:35:00,858 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  19143. 2025-07-20 16:35:01,926 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  19144. 2025-07-20 16:35:02,990 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.55s/it]
  19145. 2025-07-20 16:35:02,990 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.55s/it]
  19146. 2025-07-20 16:35:02,990 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.78s/it]
  19147. 2025-07-20 16:35:02,990 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.78s/it]
  19148. 2025-07-20 16:35:02,990 - sglang - INFO -
  19149. 2025-07-20 16:35:02,990 - __main__ - INFO -
  19150. 2025-07-20 16:35:02,992 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  19151. 2025-07-20 16:35:03,051 - sglang - INFO - [2025-07-20 16:35:03 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19152. 2025-07-20 16:35:03,051 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19153. 2025-07-20 16:35:03,064 - sglang - INFO - [2025-07-20 16:35:03 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19154. 2025-07-20 16:35:03,064 - __main__ - INFO - [2025-07-20 16:35:03 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19155. 2025-07-20 16:35:03,064 - sglang - INFO - [2025-07-20 16:35:03 TP0] Memory pool end. avail mem=5.30 GB
  19156. 2025-07-20 16:35:03,064 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Memory pool end. avail mem=5.30 GB
  19157. 2025-07-20 16:35:03,298 - sglang - INFO - [2025-07-20 16:35:03 TP0] Capture cuda graph begin. This can take up to several minutes.
  19158. 2025-07-20 16:35:03,298 - __main__ - INFO - [2025-07-20 16:35:03 TP0] Capture cuda graph begin. This can take up to several minutes.
  19159. 2025-07-20 16:35:04,070 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  19160. 2025-07-20 16:35:05,114 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  19161. 2025-07-20 16:35:05,737 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.51s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.74it/s] 100%|██████████| 4/4 [00:02<00:00, 2.13it/s] 100%|██████████| 4/4 [00:02<00:00, 1.64it/s]
  19162. 2025-07-20 16:35:05,737 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.51s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.74it/s] 100%|██████████| 4/4 [00:02<00:00, 2.13it/s] 100%|██████████| 4/4 [00:02<00:00, 1.64it/s]
  19163. 2025-07-20 16:35:05,737 - sglang - INFO - [2025-07-20 16:35:05 TP0] Capture cuda graph end. Time elapsed: 2.44 s
  19164. 2025-07-20 16:35:05,737 - __main__ - INFO - [2025-07-20 16:35:05 TP0] Capture cuda graph end. Time elapsed: 2.44 s
  19165. 2025-07-20 16:35:06,193 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  19166. 2025-07-20 16:35:06,546 - sglang - INFO - [2025-07-20 16:35:06 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19167. 2025-07-20 16:35:06,546 - __main__ - INFO - [2025-07-20 16:35:06 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19168. 2025-07-20 16:35:07,291 - __main__ - INFO - sglang server is ready.
  19169. 2025-07-20 16:35:07,291 - __main__ - INFO - Queue remaining: 5
  19170. 2025-07-20 16:35:07,291 - __main__ - INFO -
  19171. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19172. ----------------------------------------------------------------------------------
  19173. 2025-07-20 16:35:07,291 - __main__ - INFO -
  19174. Worker ID
  19175. ---------
  19176. 2025-07-20 16:35:07,292 - __main__ - INFO - Worker 0 processing work item edf9b7fc807863af5dce0a8b6f28c0cb86ca7661
  19177. 2025-07-20 16:35:07,292 - __main__ - INFO - Created all tasks for edf9b7fc807863af5dce0a8b6f28c0cb86ca7661
  19178. 2025-07-20 16:35:07,297 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
  19179. 2025-07-20 16:35:07,301 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
  19180. 2025-07-20 16:35:07,304 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
  19181. 2025-07-20 16:35:07,335 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
  19182. 2025-07-20 16:35:07,338 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
  19183. 2025-07-20 16:35:07,341 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  19184. 2025-07-20 16:35:07,641 - sglang - INFO - [2025-07-20 16:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19185. 2025-07-20 16:35:07,642 - __main__ - INFO - [2025-07-20 16:35:07 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19186. 2025-07-20 16:35:07,642 - __main__ - INFO - sglang running req: 0 queue req: 0
  19187. 2025-07-20 16:35:13,233 - sglang - INFO - [2025-07-20 16:35:13] The server is fired up and ready to roll!
  19188. 2025-07-20 16:35:13,233 - __main__ - INFO - [2025-07-20 16:35:13] The server is fired up and ready to roll!
  19189. 2025-07-20 16:35:17,294 - __main__ - INFO - Queue remaining: 4
  19190. 2025-07-20 16:35:17,294 - __main__ - INFO -
  19191. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19192. ----------------------------------------------------------------------------------
  19193. 2025-07-20 16:35:17,294 - __main__ - INFO -
  19194. Worker ID | started
  19195. ----------+--------
  19196. 0 | 43
  19197. 2025-07-20 16:35:25,173 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
  19198. 2025-07-20 16:35:25,175 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
  19199. 2025-07-20 16:35:25,189 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
  19200. 2025-07-20 16:35:25,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
  19201. 2025-07-20 16:35:25,210 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
  19202. 2025-07-20 16:35:25,223 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
  19203. 2025-07-20 16:35:25,240 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
  19204. 2025-07-20 16:35:25,253 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
  19205. 2025-07-20 16:35:25,258 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
  19206. 2025-07-20 16:35:25,269 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
  19207. 2025-07-20 16:35:25,280 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
  19208. 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
  19209. 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
  19210. 2025-07-20 16:35:25,286 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
  19211. 2025-07-20 16:35:25,289 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
  19212. 2025-07-20 16:35:25,293 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
  19213. 2025-07-20 16:35:25,307 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
  19214. 2025-07-20 16:35:25,335 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
  19215. 2025-07-20 16:35:25,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
  19216. 2025-07-20 16:35:25,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
  19217. 2025-07-20 16:35:25,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
  19218. 2025-07-20 16:35:25,340 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
  19219. 2025-07-20 16:35:25,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
  19220. 2025-07-20 16:35:25,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
  19221. 2025-07-20 16:35:25,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
  19222. 2025-07-20 16:35:25,370 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
  19223. 2025-07-20 16:35:25,372 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
  19224. 2025-07-20 16:35:25,372 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
  19225. 2025-07-20 16:35:25,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
  19226. 2025-07-20 16:35:25,378 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
  19227. 2025-07-20 16:35:25,388 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
  19228. 2025-07-20 16:35:25,396 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
  19229. 2025-07-20 16:35:25,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
  19230. 2025-07-20 16:35:25,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
  19231. 2025-07-20 16:35:25,459 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  19232. 2025-07-20 16:35:25,466 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  19233. 2025-07-20 16:35:25,567 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
  19234. 2025-07-20 16:35:25,578 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
  19235. 2025-07-20 16:35:25,635 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  19236. 2025-07-20 16:35:25,648 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  19237. 2025-07-20 16:35:25,669 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  19238. 2025-07-20 16:35:25,683 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  19239. 2025-07-20 16:35:25,686 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  19240. 2025-07-20 16:35:27,333 - __main__ - INFO - Queue remaining: 4
  19241. 2025-07-20 16:35:27,333 - __main__ - INFO -
  19242. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19243. ----------------------------------------------------------------------------------
  19244. 2025-07-20 16:35:27,335 - __main__ - INFO -
  19245. Worker ID | started
  19246. ----------+--------
  19247. 0 | 43
  19248. 2025-07-20 16:35:37,336 - __main__ - INFO - Queue remaining: 4
  19249. 2025-07-20 16:35:37,337 - __main__ - INFO -
  19250. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19251. ----------------------------------------------------------------------------------
  19252. 2025-07-20 16:35:37,337 - __main__ - INFO -
  19253. Worker ID | started
  19254. ----------+--------
  19255. 0 | 43
  19256. 2025-07-20 16:36:06,835 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  19257. 2025-07-20 16:36:06,836 - __main__ - INFO - Queue remaining: 4
  19258. 2025-07-20 16:36:06,836 - __main__ - INFO -
  19259. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19260. ----------------------------------------------------------------------------------
  19261. 2025-07-20 16:36:06,836 - __main__ - INFO -
  19262. Worker ID | started
  19263. ----------+--------
  19264. 0 | 43
  19265. 2025-07-20 16:36:06,838 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-4 cancelled
  19266. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-2 cancelled
  19267. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-5 cancelled
  19268. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-3 cancelled
  19269. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106028002.pdf-1 cancelled
  19270. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-7 cancelled
  19271. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-9 cancelled
  19272. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-2 cancelled
  19273. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-5 cancelled
  19274. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-8 cancelled
  19275. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-3 cancelled
  19276. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-6 cancelled
  19277. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-1 cancelled
  19278. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106018000.pdf-4 cancelled
  19279. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-4 cancelled
  19280. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-9 cancelled
  19281. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-5 cancelled
  19282. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-3 cancelled
  19283. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-6 cancelled
  19284. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-10 cancelled
  19285. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-2 cancelled
  19286. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-1 cancelled
  19287. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-7 cancelled
  19288. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106016000.pdf-8 cancelled
  19289. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-5 cancelled
  19290. 2025-07-20 16:36:06,839 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-6 cancelled
  19291. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-2 cancelled
  19292. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-1 cancelled
  19293. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-3 cancelled
  19294. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013003.pdf-4 cancelled
  19295. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-4 cancelled
  19296. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-5 cancelled
  19297. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-2 cancelled
  19298. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-1 cancelled
  19299. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-3 cancelled
  19300. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106013004.pdf-6 cancelled
  19301. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-6 cancelled
  19302. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-1 cancelled
  19303. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-4 cancelled
  19304. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-7 cancelled
  19305. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-2 cancelled
  19306. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-5 cancelled
  19307. 2025-07-20 16:36:06,840 - __main__ - INFO - Process page test_pdf/1144520000702630XG3440106029005.pdf-3 cancelled
  19308. 2025-07-20 16:36:06,842 - __main__ - INFO - Got cancellation request for SGLang server
  19309. 2025-07-20 16:36:44,402 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  19310. 2025-07-20 16:36:44,403 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  19311. 2025-07-20 16:36:44,403 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  19312. 2025-07-20 16:36:44,404 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  19313. 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  19314. 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  19315. 2025-07-20 16:36:44,405 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  19316. 2025-07-20 16:36:44,406 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  19317. 2025-07-20 16:36:44,406 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  19318. 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  19319. 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  19320. 2025-07-20 16:36:44,407 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  19321. 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  19322. 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  19323. 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  19324. 2025-07-20 16:36:44,408 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  19325. 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  19326. 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  19327. 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  19328. 2025-07-20 16:36:44,409 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  19329. 2025-07-20 16:36:44,410 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  19330. 2025-07-20 16:36:44,410 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  19331. 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  19332. 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  19333. 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  19334. 2025-07-20 16:36:44,411 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  19335. 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  19336. 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  19337. 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  19338. 2025-07-20 16:36:44,412 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  19339. 2025-07-20 16:36:44,413 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  19340. 2025-07-20 16:36:44,413 - __main__ - INFO - Found 30 total pdf paths to add
  19341. 2025-07-20 16:36:44,485 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 7.60
  19342. 2025-07-20 16:36:44,676 - __main__ - INFO - Starting pipeline with PID 620226
  19343. 2025-07-20 16:36:44,676 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  19344. 2025-07-20 16:36:44,756 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  19345. 2025-07-20 16:36:45,806 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  19346. 2025-07-20 16:36:46,849 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  19347. 2025-07-20 16:36:47,908 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  19348. 2025-07-20 16:36:48,971 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  19349. 2025-07-20 16:36:50,035 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  19350. 2025-07-20 16:36:51,102 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  19351. 2025-07-20 16:36:52,170 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  19352. 2025-07-20 16:36:53,236 - sglang - INFO - [2025-07-20 16:36:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=701089678, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19353. 2025-07-20 16:36:53,236 - __main__ - INFO - [2025-07-20 16:36:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=701089678, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19354. 2025-07-20 16:36:53,237 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  19355. 2025-07-20 16:36:54,297 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  19356. 2025-07-20 16:36:54,375 - sglang - INFO - [2025-07-20 16:36:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19357. 2025-07-20 16:36:54,375 - __main__ - INFO - [2025-07-20 16:36:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19358. 2025-07-20 16:36:55,373 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  19359. 2025-07-20 16:36:56,440 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  19360. 2025-07-20 16:36:57,508 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  19361. 2025-07-20 16:36:58,590 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  19362. 2025-07-20 16:36:59,670 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  19363. 2025-07-20 16:37:00,747 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  19364. 2025-07-20 16:37:00,886 - sglang - INFO - [2025-07-20 16:37:00 TP0] Overlap scheduler is disabled for multimodal models.
  19365. 2025-07-20 16:37:00,886 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Overlap scheduler is disabled for multimodal models.
  19366. 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19367. 2025-07-20 16:37:00,888 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19368. 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19369. 2025-07-20 16:37:00,888 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19370. 2025-07-20 16:37:00,888 - sglang - INFO - [2025-07-20 16:37:00 TP0] Init torch distributed begin.
  19371. 2025-07-20 16:37:00,889 - __main__ - INFO - [2025-07-20 16:37:00 TP0] Init torch distributed begin.
  19372. 2025-07-20 16:37:01,778 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  19373. 2025-07-20 16:37:02,827 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  19374. 2025-07-20 16:37:03,889 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  19375. 2025-07-20 16:37:04,956 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  19376. 2025-07-20 16:37:06,023 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  19377. 2025-07-20 16:37:06,516 - sglang - INFO - [2025-07-20 16:37:06 TP0] Load weight begin. avail mem=23.33 GB
  19378. 2025-07-20 16:37:06,516 - __main__ - INFO - [2025-07-20 16:37:06 TP0] Load weight begin. avail mem=23.33 GB
  19379. 2025-07-20 16:37:07,100 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  19380. 2025-07-20 16:37:07,234 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19381. 2025-07-20 16:37:07,234 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19382. 2025-07-20 16:37:08,177 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  19383. 2025-07-20 16:37:09,244 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  19384. 2025-07-20 16:37:10,280 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  19385. 2025-07-20 16:37:11,344 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  19386. 2025-07-20 16:37:12,413 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  19387. 2025-07-20 16:37:13,481 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  19388. 2025-07-20 16:37:14,537 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  19389. 2025-07-20 16:37:15,605 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  19390. 2025-07-20 16:37:16,672 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  19391. 2025-07-20 16:37:17,740 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  19392. 2025-07-20 16:37:18,804 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  19393. 2025-07-20 16:37:19,865 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  19394. 2025-07-20 16:37:20,077 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.84s/it]
  19395. 2025-07-20 16:37:20,077 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:12<00:38, 12.84s/it]
  19396. 2025-07-20 16:37:20,941 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  19397. 2025-07-20 16:37:22,013 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  19398. 2025-07-20 16:37:23,081 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  19399. 2025-07-20 16:37:24,148 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  19400. 2025-07-20 16:37:25,221 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  19401. 2025-07-20 16:37:26,290 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  19402. 2025-07-20 16:37:27,371 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  19403. 2025-07-20 16:37:28,424 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  19404. 2025-07-20 16:37:29,485 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  19405. 2025-07-20 16:37:30,553 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  19406. 2025-07-20 16:37:31,625 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  19407. 2025-07-20 16:37:32,693 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  19408. 2025-07-20 16:37:33,113 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.96s/it]
  19409. 2025-07-20 16:37:33,113 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:25<00:25, 12.96s/it]
  19410. 2025-07-20 16:37:33,770 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  19411. 2025-07-20 16:37:34,838 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  19412. 2025-07-20 16:37:35,891 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  19413. 2025-07-20 16:37:36,953 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  19414. 2025-07-20 16:37:38,021 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  19415. 2025-07-20 16:37:39,088 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  19416. 2025-07-20 16:37:40,148 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  19417. 2025-07-20 16:37:41,216 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  19418. 2025-07-20 16:37:42,284 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  19419. 2025-07-20 16:37:43,352 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  19420. 2025-07-20 16:37:44,420 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  19421. 2025-07-20 16:37:45,489 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  19422. 2025-07-20 16:37:46,557 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  19423. 2025-07-20 16:37:46,698 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.24s/it]
  19424. 2025-07-20 16:37:46,698 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.24s/it]
  19425. 2025-07-20 16:37:47,635 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  19426. 2025-07-20 16:37:48,701 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  19427. 2025-07-20 16:37:49,769 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  19428. 2025-07-20 16:37:50,837 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  19429. 2025-07-20 16:37:51,177 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.78s/it]
  19430. 2025-07-20 16:37:51,177 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.78s/it]
  19431. 2025-07-20 16:37:51,178 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.99s/it]
  19432. 2025-07-20 16:37:51,178 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.99s/it]
  19433. 2025-07-20 16:37:51,178 - sglang - INFO -
  19434. 2025-07-20 16:37:51,178 - __main__ - INFO -
  19435. 2025-07-20 16:37:51,259 - sglang - INFO - [2025-07-20 16:37:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19436. 2025-07-20 16:37:51,259 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19437. 2025-07-20 16:37:51,273 - sglang - INFO - [2025-07-20 16:37:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19438. 2025-07-20 16:37:51,273 - __main__ - INFO - [2025-07-20 16:37:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19439. 2025-07-20 16:37:51,273 - sglang - INFO - [2025-07-20 16:37:51 TP0] Memory pool end. avail mem=5.30 GB
  19440. 2025-07-20 16:37:51,273 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Memory pool end. avail mem=5.30 GB
  19441. 2025-07-20 16:37:51,499 - sglang - INFO - [2025-07-20 16:37:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  19442. 2025-07-20 16:37:51,499 - __main__ - INFO - [2025-07-20 16:37:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  19443. 2025-07-20 16:37:51,913 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  19444. 2025-07-20 16:37:52,969 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  19445. 2025-07-20 16:37:54,044 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.47s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.66it/s] 100%|██████████| 4/4 [00:02<00:00, 1.98it/s] 100%|██████████| 4/4 [00:02<00:00, 1.58it/s]
  19446. 2025-07-20 16:37:54,044 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.47s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.66it/s] 100%|██████████| 4/4 [00:02<00:00, 1.98it/s] 100%|██████████| 4/4 [00:02<00:00, 1.58it/s]
  19447. 2025-07-20 16:37:54,044 - sglang - INFO - [2025-07-20 16:37:54 TP0] Capture cuda graph end. Time elapsed: 2.54 s
  19448. 2025-07-20 16:37:54,044 - __main__ - INFO - [2025-07-20 16:37:54 TP0] Capture cuda graph end. Time elapsed: 2.54 s
  19449. 2025-07-20 16:37:54,045 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  19450. 2025-07-20 16:37:54,823 - sglang - INFO - [2025-07-20 16:37:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19451. 2025-07-20 16:37:54,823 - __main__ - INFO - [2025-07-20 16:37:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19452. 2025-07-20 16:37:55,133 - __main__ - INFO - sglang server is ready.
  19453. 2025-07-20 16:37:55,134 - __main__ - INFO - Queue remaining: 30
  19454. 2025-07-20 16:37:55,134 - __main__ - INFO -
  19455. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19456. ----------------------------------------------------------------------------------
  19457. 2025-07-20 16:37:55,134 - __main__ - INFO -
  19458. Worker ID
  19459. ---------
  19460. 2025-07-20 16:37:55,134 - __main__ - INFO - Worker 0 processing work item 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  19461. 2025-07-20 16:37:55,134 - __main__ - INFO - Created all tasks for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  19462. 2025-07-20 16:37:55,137 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  19463. 2025-07-20 16:37:55,933 - sglang - INFO - [2025-07-20 16:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19464. 2025-07-20 16:37:55,933 - __main__ - INFO - [2025-07-20 16:37:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19465. 2025-07-20 16:37:55,933 - __main__ - INFO - sglang running req: 0 queue req: 0
  19466. 2025-07-20 16:37:57,761 - sglang - INFO - [2025-07-20 16:37:57] The server is fired up and ready to roll!
  19467. 2025-07-20 16:37:57,761 - __main__ - INFO - [2025-07-20 16:37:57] The server is fired up and ready to roll!
  19468. 2025-07-20 16:38:03,337 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  19469. 2025-07-20 16:38:03,368 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  19470. 2025-07-20 16:38:03,377 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  19471. 2025-07-20 16:38:03,420 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  19472. 2025-07-20 16:38:03,471 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  19473. 2025-07-20 16:38:03,506 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  19474. 2025-07-20 16:38:03,549 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  19475. 2025-07-20 16:38:05,135 - __main__ - INFO - Queue remaining: 29
  19476. 2025-07-20 16:38:05,137 - __main__ - INFO -
  19477. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19478. ----------------------------------------------------------------------------------
  19479. 2025-07-20 16:38:05,137 - __main__ - INFO -
  19480. Worker ID | started
  19481. ----------+--------
  19482. 0 | 7
  19483. 2025-07-20 16:38:15,137 - __main__ - INFO - Queue remaining: 29
  19484. 2025-07-20 16:38:15,144 - __main__ - INFO -
  19485. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19486. ----------------------------------------------------------------------------------
  19487. 2025-07-20 16:38:15,144 - __main__ - INFO -
  19488. Worker ID | started
  19489. ----------+--------
  19490. 0 | 7
  19491. 2025-07-20 16:54:15,726 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  19492. 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  19493. 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  19494. 2025-07-20 16:54:15,727 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  19495. 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  19496. 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  19497. 2025-07-20 16:54:15,728 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  19498. 2025-07-20 16:54:15,729 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  19499. 2025-07-20 16:54:15,729 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  19500. 2025-07-20 16:54:15,730 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  19501. 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  19502. 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  19503. 2025-07-20 16:54:15,731 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  19504. 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  19505. 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  19506. 2025-07-20 16:54:15,732 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  19507. 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  19508. 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  19509. 2025-07-20 16:54:15,733 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  19510. 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  19511. 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  19512. 2025-07-20 16:54:15,734 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  19513. 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  19514. 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  19515. 2025-07-20 16:54:15,735 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  19516. 2025-07-20 16:54:15,736 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  19517. 2025-07-20 16:54:15,736 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  19518. 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  19519. 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  19520. 2025-07-20 16:54:15,737 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  19521. 2025-07-20 16:54:15,738 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  19522. 2025-07-20 16:54:15,738 - __main__ - INFO - Found 30 total pdf paths to add
  19523. 2025-07-20 16:54:15,824 - __main__ - INFO - Calculated items_per_group: 6 based on average pages per PDF: 7.60
  19524. 2025-07-20 16:54:16,011 - __main__ - INFO - Starting pipeline with PID 623290
  19525. 2025-07-20 16:54:16,011 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  19526. 2025-07-20 16:54:16,255 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  19527. 2025-07-20 16:54:17,316 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  19528. 2025-07-20 16:54:18,370 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  19529. 2025-07-20 16:54:19,432 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  19530. 2025-07-20 16:54:20,498 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  19531. 2025-07-20 16:54:21,562 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  19532. 2025-07-20 16:54:22,650 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  19533. 2025-07-20 16:54:23,691 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  19534. 2025-07-20 16:54:24,420 - sglang - INFO - [2025-07-20 16:54:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=105231769, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19535. 2025-07-20 16:54:24,420 - __main__ - INFO - [2025-07-20 16:54:24] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=105231769, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19536. 2025-07-20 16:54:24,768 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  19537. 2025-07-20 16:54:25,447 - sglang - INFO - [2025-07-20 16:54:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19538. 2025-07-20 16:54:25,447 - __main__ - INFO - [2025-07-20 16:54:25] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19539. 2025-07-20 16:54:25,842 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  19540. 2025-07-20 16:54:26,898 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  19541. 2025-07-20 16:54:27,944 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  19542. 2025-07-20 16:54:29,010 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  19543. 2025-07-20 16:54:30,085 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  19544. 2025-07-20 16:54:31,166 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  19545. 2025-07-20 16:54:31,851 - sglang - INFO - [2025-07-20 16:54:31 TP0] Overlap scheduler is disabled for multimodal models.
  19546. 2025-07-20 16:54:31,851 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Overlap scheduler is disabled for multimodal models.
  19547. 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19548. 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19549. 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19550. 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19551. 2025-07-20 16:54:31,853 - sglang - INFO - [2025-07-20 16:54:31 TP0] Init torch distributed begin.
  19552. 2025-07-20 16:54:31,853 - __main__ - INFO - [2025-07-20 16:54:31 TP0] Init torch distributed begin.
  19553. 2025-07-20 16:54:32,212 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  19554. 2025-07-20 16:54:33,258 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  19555. 2025-07-20 16:54:34,292 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  19556. 2025-07-20 16:54:35,331 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  19557. 2025-07-20 16:54:36,388 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  19558. 2025-07-20 16:54:37,433 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  19559. 2025-07-20 16:54:37,450 - sglang - INFO - [2025-07-20 16:54:37 TP0] Load weight begin. avail mem=23.33 GB
  19560. 2025-07-20 16:54:37,450 - __main__ - INFO - [2025-07-20 16:54:37 TP0] Load weight begin. avail mem=23.33 GB
  19561. 2025-07-20 16:54:38,145 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19562. 2025-07-20 16:54:38,145 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19563. 2025-07-20 16:54:38,488 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  19564. 2025-07-20 16:54:39,553 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  19565. 2025-07-20 16:54:40,600 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  19566. 2025-07-20 16:54:41,642 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  19567. 2025-07-20 16:54:42,701 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  19568. 2025-07-20 16:54:43,795 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  19569. 2025-07-20 16:54:44,873 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  19570. 2025-07-20 16:54:45,917 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  19571. 2025-07-20 16:54:46,975 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  19572. 2025-07-20 16:54:48,022 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  19573. 2025-07-20 16:54:49,063 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  19574. 2025-07-20 16:54:50,121 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  19575. 2025-07-20 16:54:51,187 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  19576. 2025-07-20 16:54:51,489 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:40, 13.34s/it]
  19577. 2025-07-20 16:54:51,489 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:40, 13.34s/it]
  19578. 2025-07-20 16:54:52,236 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  19579. 2025-07-20 16:54:53,274 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  19580. 2025-07-20 16:54:54,326 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  19581. 2025-07-20 16:54:55,374 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  19582. 2025-07-20 16:54:56,416 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  19583. 2025-07-20 16:54:57,468 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  19584. 2025-07-20 16:54:58,533 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  19585. 2025-07-20 16:54:59,607 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  19586. 2025-07-20 16:55:00,675 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  19587. 2025-07-20 16:55:01,742 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  19588. 2025-07-20 16:55:02,805 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  19589. 2025-07-20 16:55:03,872 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  19590. 2025-07-20 16:55:04,483 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.14s/it]
  19591. 2025-07-20 16:55:04,483 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.14s/it]
  19592. 2025-07-20 16:55:04,952 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  19593. 2025-07-20 16:55:06,023 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  19594. 2025-07-20 16:55:07,095 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  19595. 2025-07-20 16:55:08,161 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  19596. 2025-07-20 16:55:09,237 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  19597. 2025-07-20 16:55:10,312 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  19598. 2025-07-20 16:55:11,379 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  19599. 2025-07-20 16:55:12,435 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  19600. 2025-07-20 16:55:13,501 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  19601. 2025-07-20 16:55:14,567 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  19602. 2025-07-20 16:55:15,633 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  19603. 2025-07-20 16:55:16,700 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  19604. 2025-07-20 16:55:17,389 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.03s/it]
  19605. 2025-07-20 16:55:17,390 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.03s/it]
  19606. 2025-07-20 16:55:17,780 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  19607. 2025-07-20 16:55:18,853 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  19608. 2025-07-20 16:55:19,924 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  19609. 2025-07-20 16:55:20,992 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  19610. 2025-07-20 16:55:21,963 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.69s/it]
  19611. 2025-07-20 16:55:21,963 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.69s/it]
  19612. 2025-07-20 16:55:21,963 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  19613. 2025-07-20 16:55:21,963 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.95s/it]
  19614. 2025-07-20 16:55:21,963 - sglang - INFO -
  19615. 2025-07-20 16:55:21,963 - __main__ - INFO -
  19616. 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19617. 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19618. 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19619. 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19620. 2025-07-20 16:55:22,070 - sglang - INFO - [2025-07-20 16:55:22 TP0] Memory pool end. avail mem=5.30 GB
  19621. 2025-07-20 16:55:22,070 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Memory pool end. avail mem=5.30 GB
  19622. 2025-07-20 16:55:22,071 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  19623. 2025-07-20 16:55:22,295 - sglang - INFO - [2025-07-20 16:55:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  19624. 2025-07-20 16:55:22,295 - __main__ - INFO - [2025-07-20 16:55:22 TP0] Capture cuda graph begin. This can take up to several minutes.
  19625. 2025-07-20 16:55:23,152 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  19626. 2025-07-20 16:55:24,233 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  19627. 2025-07-20 16:55:24,722 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.53s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.73it/s] 100%|██████████| 4/4 [00:02<00:00, 2.16it/s] 100%|██████████| 4/4 [00:02<00:00, 1.65it/s]
  19628. 2025-07-20 16:55:24,723 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.53s/it] 50%|█████ | 2/4 [00:01<00:01, 1.23it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.73it/s] 100%|██████████| 4/4 [00:02<00:00, 2.16it/s] 100%|██████████| 4/4 [00:02<00:00, 1.65it/s]
  19629. 2025-07-20 16:55:24,723 - sglang - INFO - [2025-07-20 16:55:24 TP0] Capture cuda graph end. Time elapsed: 2.43 s
  19630. 2025-07-20 16:55:24,723 - __main__ - INFO - [2025-07-20 16:55:24 TP0] Capture cuda graph end. Time elapsed: 2.43 s
  19631. 2025-07-20 16:55:25,313 - __main__ - WARNING - Attempt 66: Please wait for sglang server to become ready...
  19632. 2025-07-20 16:55:25,475 - sglang - INFO - [2025-07-20 16:55:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19633. 2025-07-20 16:55:25,476 - __main__ - INFO - [2025-07-20 16:55:25 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19634. 2025-07-20 16:55:26,406 - __main__ - INFO - sglang server is ready.
  19635. 2025-07-20 16:55:26,406 - __main__ - INFO - Queue remaining: 5
  19636. 2025-07-20 16:55:26,406 - __main__ - INFO -
  19637. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19638. ----------------------------------------------------------------------------------
  19639. 2025-07-20 16:55:26,407 - __main__ - INFO -
  19640. Worker ID
  19641. ---------
  19642. 2025-07-20 16:55:26,407 - __main__ - INFO - Worker 0 processing work item 550d87ac0b148afaac52196ab4f139412015292f
  19643. 2025-07-20 16:55:26,407 - __main__ - INFO - Created all tasks for 550d87ac0b148afaac52196ab4f139412015292f
  19644. 2025-07-20 16:55:26,415 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
  19645. 2025-07-20 16:55:26,418 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
  19646. 2025-07-20 16:55:26,422 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
  19647. 2025-07-20 16:55:26,469 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
  19648. 2025-07-20 16:55:26,474 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
  19649. 2025-07-20 16:55:26,478 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
  19650. 2025-07-20 16:55:26,643 - sglang - INFO - [2025-07-20 16:55:26 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19651. 2025-07-20 16:55:26,643 - __main__ - INFO - [2025-07-20 16:55:26 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19652. 2025-07-20 16:55:26,643 - __main__ - INFO - sglang running req: 0 queue req: 0
  19653. 2025-07-20 16:55:30,036 - sglang - INFO - [2025-07-20 16:55:30] The server is fired up and ready to roll!
  19654. 2025-07-20 16:55:30,036 - __main__ - INFO - [2025-07-20 16:55:30] The server is fired up and ready to roll!
  19655. 2025-07-20 16:55:36,408 - __main__ - INFO - Queue remaining: 4
  19656. 2025-07-20 16:55:36,408 - __main__ - INFO -
  19657. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19658. ----------------------------------------------------------------------------------
  19659. 2025-07-20 16:55:36,408 - __main__ - INFO -
  19660. Worker ID | started
  19661. ----------+--------
  19662. 0 | 64
  19663. 2025-07-20 16:55:38,901 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
  19664. 2025-07-20 16:55:38,933 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
  19665. 2025-07-20 16:55:38,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
  19666. 2025-07-20 16:55:39,006 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
  19667. 2025-07-20 16:55:39,040 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
  19668. 2025-07-20 16:55:39,076 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
  19669. 2025-07-20 16:55:39,088 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
  19670. 2025-07-20 16:55:39,093 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
  19671. 2025-07-20 16:55:39,095 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
  19672. 2025-07-20 16:55:39,119 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
  19673. 2025-07-20 16:55:39,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
  19674. 2025-07-20 16:55:39,145 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
  19675. 2025-07-20 16:55:39,180 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
  19676. 2025-07-20 16:55:39,183 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
  19677. 2025-07-20 16:55:39,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
  19678. 2025-07-20 16:55:39,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
  19679. 2025-07-20 16:55:39,279 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
  19680. 2025-07-20 16:55:39,288 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
  19681. 2025-07-20 16:55:39,351 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
  19682. 2025-07-20 16:55:39,352 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
  19683. 2025-07-20 16:55:39,382 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  19684. 2025-07-20 16:55:39,437 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
  19685. 2025-07-20 16:55:39,450 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
  19686. 2025-07-20 16:55:39,469 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
  19687. 2025-07-20 16:55:39,475 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
  19688. 2025-07-20 16:55:39,535 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
  19689. 2025-07-20 16:55:39,535 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
  19690. 2025-07-20 16:55:39,561 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
  19691. 2025-07-20 16:55:39,563 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
  19692. 2025-07-20 16:55:39,637 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
  19693. 2025-07-20 16:55:39,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  19694. 2025-07-20 16:55:39,647 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
  19695. 2025-07-20 16:55:39,651 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
  19696. 2025-07-20 16:55:39,655 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
  19697. 2025-07-20 16:55:39,663 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
  19698. 2025-07-20 16:55:39,739 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
  19699. 2025-07-20 16:55:39,740 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
  19700. 2025-07-20 16:55:39,745 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
  19701. 2025-07-20 16:55:39,749 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
  19702. 2025-07-20 16:55:39,750 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
  19703. 2025-07-20 16:55:39,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
  19704. 2025-07-20 16:55:39,836 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
  19705. 2025-07-20 16:55:39,845 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
  19706. 2025-07-20 16:55:39,848 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
  19707. 2025-07-20 16:55:39,849 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  19708. 2025-07-20 16:55:39,850 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
  19709. 2025-07-20 16:55:39,856 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
  19710. 2025-07-20 16:55:39,934 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
  19711. 2025-07-20 16:55:39,935 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
  19712. 2025-07-20 16:55:39,938 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
  19713. 2025-07-20 16:55:39,940 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
  19714. 2025-07-20 16:55:39,947 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
  19715. 2025-07-20 16:55:39,949 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
  19716. 2025-07-20 16:55:40,033 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
  19717. 2025-07-20 16:55:40,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
  19718. 2025-07-20 16:55:40,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
  19719. 2025-07-20 16:55:40,055 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
  19720. 2025-07-20 16:55:40,059 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
  19721. 2025-07-20 16:55:40,141 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
  19722. 2025-07-20 16:55:40,241 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
  19723. 2025-07-20 16:55:40,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
  19724. 2025-07-20 16:55:40,336 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
  19725. 2025-07-20 16:55:40,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
  19726. 2025-07-20 16:55:40,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
  19727. 2025-07-20 16:55:46,433 - __main__ - INFO - Queue remaining: 4
  19728. 2025-07-20 16:55:46,544 - __main__ - INFO -
  19729. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19730. ----------------------------------------------------------------------------------
  19731. 2025-07-20 16:55:46,544 - __main__ - INFO -
  19732. Worker ID | started
  19733. ----------+--------
  19734. 0 | 64
  19735. 2025-07-20 16:55:56,545 - __main__ - INFO - Queue remaining: 4
  19736. 2025-07-20 16:55:56,550 - __main__ - INFO -
  19737. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19738. ----------------------------------------------------------------------------------
  19739. 2025-07-20 16:55:56,552 - __main__ - INFO -
  19740. Worker ID | started
  19741. ----------+--------
  19742. 0 | 64
  19743. 2025-07-20 16:56:00,641 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  19744. 2025-07-20 16:56:03,561 - sglang - INFO - [2025-07-20 16:56:03 TP0] Prefill batch. #new-seq: 1, #new-token: 2606, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19745. 2025-07-20 16:56:03,561 - __main__ - INFO - sglang running req: 0 queue req: 0
  19746. 2025-07-20 16:56:06,553 - __main__ - INFO - Queue remaining: 4
  19747. 2025-07-20 16:56:06,615 - __main__ - INFO -
  19748. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19749. ----------------------------------------------------------------------------------
  19750. 2025-07-20 16:56:06,615 - __main__ - INFO -
  19751. Worker ID | started
  19752. ----------+--------
  19753. 0 | 64
  19754. 2025-07-20 17:03:31,482 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  19755. 2025-07-20 17:03:31,483 - __main__ - INFO - Loading file at olmocr_workspace/job_1753002204/input.pdf as PDF document
  19756. 2025-07-20 17:03:31,483 - __main__ - INFO - Found 1 total pdf paths to add
  19757. 2025-07-20 17:03:31,488 - __main__ - INFO - Calculated items_per_group: 3 based on average pages per PDF: 14.00
  19758. 2025-07-20 17:03:31,716 - __main__ - INFO - Starting pipeline with PID 626896
  19759. 2025-07-20 17:03:31,717 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  19760. 2025-07-20 17:03:31,822 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  19761. 2025-07-20 17:03:32,865 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  19762. 2025-07-20 17:03:33,924 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  19763. 2025-07-20 17:03:34,989 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  19764. 2025-07-20 17:03:36,055 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  19765. 2025-07-20 17:03:37,123 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  19766. 2025-07-20 17:03:38,194 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  19767. 2025-07-20 17:03:39,253 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  19768. 2025-07-20 17:03:39,365 - sglang - INFO - [2025-07-20 17:03:39] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378607899, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19769. 2025-07-20 17:03:39,365 - __main__ - INFO - [2025-07-20 17:03:39] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=378607899, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  19770. 2025-07-20 17:03:40,332 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  19771. 2025-07-20 17:03:40,392 - sglang - INFO - [2025-07-20 17:03:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19772. 2025-07-20 17:03:40,392 - __main__ - INFO - [2025-07-20 17:03:40] Use chat template for the OpenAI-compatible API server: qwen2-vl
  19773. 2025-07-20 17:03:41,411 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  19774. 2025-07-20 17:03:42,482 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  19775. 2025-07-20 17:03:43,556 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  19776. 2025-07-20 17:03:44,619 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  19777. 2025-07-20 17:03:45,689 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  19778. 2025-07-20 17:03:46,665 - sglang - INFO - [2025-07-20 17:03:46 TP0] Overlap scheduler is disabled for multimodal models.
  19779. 2025-07-20 17:03:46,665 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Overlap scheduler is disabled for multimodal models.
  19780. 2025-07-20 17:03:46,667 - sglang - INFO - [2025-07-20 17:03:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19781. 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  19782. 2025-07-20 17:03:46,668 - sglang - INFO - [2025-07-20 17:03:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19783. 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  19784. 2025-07-20 17:03:46,668 - sglang - INFO - [2025-07-20 17:03:46 TP0] Init torch distributed begin.
  19785. 2025-07-20 17:03:46,668 - __main__ - INFO - [2025-07-20 17:03:46 TP0] Init torch distributed begin.
  19786. 2025-07-20 17:03:46,767 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  19787. 2025-07-20 17:03:47,838 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  19788. 2025-07-20 17:03:48,901 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  19789. 2025-07-20 17:03:49,955 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  19790. 2025-07-20 17:03:51,022 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  19791. 2025-07-20 17:03:52,087 - sglang - INFO - [2025-07-20 17:03:52 TP0] Load weight begin. avail mem=23.33 GB
  19792. 2025-07-20 17:03:52,087 - __main__ - INFO - [2025-07-20 17:03:52 TP0] Load weight begin. avail mem=23.33 GB
  19793. 2025-07-20 17:03:52,088 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  19794. 2025-07-20 17:03:52,787 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19795. 2025-07-20 17:03:52,787 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  19796. 2025-07-20 17:03:53,167 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  19797. 2025-07-20 17:03:54,238 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  19798. 2025-07-20 17:03:55,308 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  19799. 2025-07-20 17:03:56,379 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  19800. 2025-07-20 17:03:57,450 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  19801. 2025-07-20 17:03:58,520 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  19802. 2025-07-20 17:03:59,590 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  19803. 2025-07-20 17:04:00,660 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  19804. 2025-07-20 17:04:01,730 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  19805. 2025-07-20 17:04:02,796 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  19806. 2025-07-20 17:04:03,863 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  19807. 2025-07-20 17:04:04,930 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  19808. 2025-07-20 17:04:05,832 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.04s/it]
  19809. 2025-07-20 17:04:05,832 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:39, 13.04s/it]
  19810. 2025-07-20 17:04:05,996 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  19811. 2025-07-20 17:04:07,061 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  19812. 2025-07-20 17:04:08,126 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  19813. 2025-07-20 17:04:09,188 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  19814. 2025-07-20 17:04:10,254 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  19815. 2025-07-20 17:04:11,322 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  19816. 2025-07-20 17:04:12,387 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  19817. 2025-07-20 17:04:13,455 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  19818. 2025-07-20 17:04:14,527 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  19819. 2025-07-20 17:04:15,594 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  19820. 2025-07-20 17:04:16,660 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  19821. 2025-07-20 17:04:17,726 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  19822. 2025-07-20 17:04:18,793 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  19823. 2025-07-20 17:04:19,022 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.13s/it]
  19824. 2025-07-20 17:04:19,022 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:26<00:26, 13.13s/it]
  19825. 2025-07-20 17:04:19,872 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  19826. 2025-07-20 17:04:20,938 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  19827. 2025-07-20 17:04:22,001 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  19828. 2025-07-20 17:04:23,053 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  19829. 2025-07-20 17:04:24,123 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  19830. 2025-07-20 17:04:25,188 - __main__ - WARNING - Attempt 51: Please wait for sglang server to become ready...
  19831. 2025-07-20 17:04:26,254 - __main__ - WARNING - Attempt 52: Please wait for sglang server to become ready...
  19832. 2025-07-20 17:04:27,319 - __main__ - WARNING - Attempt 53: Please wait for sglang server to become ready...
  19833. 2025-07-20 17:04:28,390 - __main__ - WARNING - Attempt 54: Please wait for sglang server to become ready...
  19834. 2025-07-20 17:04:29,460 - __main__ - WARNING - Attempt 55: Please wait for sglang server to become ready...
  19835. 2025-07-20 17:04:30,531 - __main__ - WARNING - Attempt 56: Please wait for sglang server to become ready...
  19836. 2025-07-20 17:04:31,602 - __main__ - WARNING - Attempt 57: Please wait for sglang server to become ready...
  19837. 2025-07-20 17:04:32,016 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
  19838. 2025-07-20 17:04:32,017 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:39<00:13, 13.07s/it]
  19839. 2025-07-20 17:04:32,672 - __main__ - WARNING - Attempt 58: Please wait for sglang server to become ready...
  19840. 2025-07-20 17:04:33,740 - __main__ - WARNING - Attempt 59: Please wait for sglang server to become ready...
  19841. 2025-07-20 17:04:34,808 - __main__ - WARNING - Attempt 60: Please wait for sglang server to become ready...
  19842. 2025-07-20 17:04:35,875 - __main__ - WARNING - Attempt 61: Please wait for sglang server to become ready...
  19843. 2025-07-20 17:04:36,607 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.72s/it]
  19844. 2025-07-20 17:04:36,607 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 9.72s/it]
  19845. 2025-07-20 17:04:36,607 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.96s/it]
  19846. 2025-07-20 17:04:36,607 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:43<00:00, 10.96s/it]
  19847. 2025-07-20 17:04:36,607 - sglang - INFO -
  19848. 2025-07-20 17:04:36,608 - __main__ - INFO -
  19849. 2025-07-20 17:04:36,695 - sglang - INFO - [2025-07-20 17:04:36 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19850. 2025-07-20 17:04:36,695 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  19851. 2025-07-20 17:04:36,708 - sglang - INFO - [2025-07-20 17:04:36 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19852. 2025-07-20 17:04:36,708 - __main__ - INFO - [2025-07-20 17:04:36 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  19853. 2025-07-20 17:04:36,709 - sglang - INFO - [2025-07-20 17:04:36 TP0] Memory pool end. avail mem=5.30 GB
  19854. 2025-07-20 17:04:36,709 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Memory pool end. avail mem=5.30 GB
  19855. 2025-07-20 17:04:36,953 - sglang - INFO - [2025-07-20 17:04:36 TP0] Capture cuda graph begin. This can take up to several minutes.
  19856. 2025-07-20 17:04:36,953 - __main__ - INFO - [2025-07-20 17:04:36 TP0] Capture cuda graph begin. This can take up to several minutes.
  19857. 2025-07-20 17:04:36,955 - __main__ - WARNING - Attempt 62: Please wait for sglang server to become ready...
  19858. 2025-07-20 17:04:38,030 - __main__ - WARNING - Attempt 63: Please wait for sglang server to become ready...
  19859. 2025-07-20 17:04:39,108 - __main__ - WARNING - Attempt 64: Please wait for sglang server to become ready...
  19860. 2025-07-20 17:04:39,303 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.46s/it] 50%|█████ | 2/4 [00:01<00:01, 1.28it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.78it/s] 100%|██████████| 4/4 [00:02<00:00, 2.17it/s] 100%|██████████| 4/4 [00:02<00:00, 1.69it/s]
  19861. 2025-07-20 17:04:39,303 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:04, 1.46s/it] 50%|█████ | 2/4 [00:01<00:01, 1.28it/s] 75%|███████▌ | 3/4 [00:02<00:00, 1.78it/s] 100%|██████████| 4/4 [00:02<00:00, 2.17it/s] 100%|██████████| 4/4 [00:02<00:00, 1.69it/s]
  19862. 2025-07-20 17:04:39,303 - sglang - INFO - [2025-07-20 17:04:39 TP0] Capture cuda graph end. Time elapsed: 2.38 s
  19863. 2025-07-20 17:04:39,303 - __main__ - INFO - [2025-07-20 17:04:39 TP0] Capture cuda graph end. Time elapsed: 2.38 s
  19864. 2025-07-20 17:04:40,096 - sglang - INFO - [2025-07-20 17:04:40 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19865. 2025-07-20 17:04:40,096 - __main__ - INFO - [2025-07-20 17:04:40 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  19866. 2025-07-20 17:04:40,187 - __main__ - WARNING - Attempt 65: Please wait for sglang server to become ready...
  19867. 2025-07-20 17:04:41,255 - sglang - INFO - [2025-07-20 17:04:41 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19868. 2025-07-20 17:04:41,255 - __main__ - INFO - [2025-07-20 17:04:41 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19869. 2025-07-20 17:04:41,255 - __main__ - INFO - sglang running req: 0 queue req: 0
  19870. 2025-07-20 17:04:41,327 - __main__ - INFO - sglang server is ready.
  19871. 2025-07-20 17:04:41,328 - __main__ - INFO - Queue remaining: 1
  19872. 2025-07-20 17:04:41,328 - __main__ - INFO -
  19873. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19874. ----------------------------------------------------------------------------------
  19875. 2025-07-20 17:04:41,328 - __main__ - INFO -
  19876. Worker ID
  19877. ---------
  19878. 2025-07-20 17:04:41,329 - __main__ - INFO - Worker 0 processing work item b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
  19879. 2025-07-20 17:04:41,329 - __main__ - INFO - Created all tasks for b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
  19880. 2025-07-20 17:04:41,335 - __main__ - INFO - Got 14 pages to do for olmocr_workspace/job_1753002204/input.pdf in worker 0
  19881. 2025-07-20 17:04:42,536 - sglang - INFO - [2025-07-20 17:04:42] The server is fired up and ready to roll!
  19882. 2025-07-20 17:04:42,537 - __main__ - INFO - [2025-07-20 17:04:42] The server is fired up and ready to roll!
  19883. 2025-07-20 17:04:48,506 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-1
  19884. 2025-07-20 17:04:48,532 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-3
  19885. 2025-07-20 17:04:48,545 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-2
  19886. 2025-07-20 17:04:48,574 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-4
  19887. 2025-07-20 17:04:48,587 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-5
  19888. 2025-07-20 17:04:48,616 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-8
  19889. 2025-07-20 17:04:48,628 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-7
  19890. 2025-07-20 17:04:48,667 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-9
  19891. 2025-07-20 17:04:48,701 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-14
  19892. 2025-07-20 17:04:48,719 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-6
  19893. 2025-07-20 17:04:48,724 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-13
  19894. 2025-07-20 17:04:48,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-12
  19895. 2025-07-20 17:04:48,727 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-11
  19896. 2025-07-20 17:04:48,766 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002204/input.pdf-10
  19897. 2025-07-20 17:04:51,330 - __main__ - INFO - Queue remaining: 0
  19898. 2025-07-20 17:04:51,331 - __main__ - INFO -
  19899. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19900. ----------------------------------------------------------------------------------
  19901. 2025-07-20 17:04:51,331 - __main__ - INFO -
  19902. Worker ID | started
  19903. ----------+--------
  19904. 0 | 14
  19905. 2025-07-20 17:05:01,333 - __main__ - INFO - Queue remaining: 0
  19906. 2025-07-20 17:05:01,339 - __main__ - INFO -
  19907. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19908. ----------------------------------------------------------------------------------
  19909. 2025-07-20 17:05:01,339 - __main__ - INFO -
  19910. Worker ID | started
  19911. ----------+--------
  19912. 0 | 14
  19913. 2025-07-20 17:05:09,504 - sglang - INFO - [2025-07-20 17:05:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1821, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  19914. 2025-07-20 17:05:09,504 - __main__ - INFO - sglang running req: 0 queue req: 0
  19915. 2025-07-20 17:05:10,800 - sglang - INFO - [2025-07-20 17:05:10 TP0] Prefill batch. #new-seq: 6, #new-token: 13951, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 7
  19916. 2025-07-20 17:05:10,800 - __main__ - INFO - sglang running req: 1 queue req: 7
  19917. 2025-07-20 17:05:11,340 - __main__ - INFO - Queue remaining: 0
  19918. 2025-07-20 17:05:11,341 - __main__ - INFO -
  19919. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19920. ----------------------------------------------------------------------------------
  19921. 2025-07-20 17:05:11,341 - __main__ - INFO -
  19922. Worker ID | started
  19923. ----------+--------
  19924. 0 | 14
  19925. 2025-07-20 17:05:15,773 - sglang - INFO - [2025-07-20 17:05:15 TP0] Decode batch. #running-req: 7, #token: 16003, token usage: 0.42, gen throughput (token/s): 6.67, #queue-req: 7
  19926. 2025-07-20 17:05:15,773 - __main__ - INFO - sglang running req: 7 queue req: 7
  19927. 2025-07-20 17:05:16,649 - sglang - INFO - [2025-07-20 17:05:16 TP0] Decode batch. #running-req: 7, #token: 16283, token usage: 0.43, gen throughput (token/s): 319.87, #queue-req: 7
  19928. 2025-07-20 17:05:16,649 - __main__ - INFO - sglang running req: 7 queue req: 7
  19929. 2025-07-20 17:05:17,526 - sglang - INFO - [2025-07-20 17:05:17 TP0] Decode batch. #running-req: 7, #token: 16563, token usage: 0.44, gen throughput (token/s): 319.22, #queue-req: 7
  19930. 2025-07-20 17:05:17,526 - __main__ - INFO - sglang running req: 7 queue req: 7
  19931. 2025-07-20 17:05:18,250 - sglang - INFO - [2025-07-20 17:05:18 TP0] Prefill batch. #new-seq: 2, #new-token: 5220, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.40, #running-req: 6, #queue-req: 5
  19932. 2025-07-20 17:05:18,250 - __main__ - INFO - sglang running req: 6 queue req: 5
  19933. 2025-07-20 17:05:19,869 - sglang - INFO - [2025-07-20 17:05:19 TP0] Decode batch. #running-req: 8, #token: 20559, token usage: 0.54, gen throughput (token/s): 122.04, #queue-req: 5
  19934. 2025-07-20 17:05:19,870 - __main__ - INFO - sglang running req: 8 queue req: 5
  19935. 2025-07-20 17:05:20,753 - sglang - INFO - [2025-07-20 17:05:20 TP0] Decode batch. #running-req: 8, #token: 20879, token usage: 0.55, gen throughput (token/s): 362.21, #queue-req: 5
  19936. 2025-07-20 17:05:20,753 - __main__ - INFO - sglang running req: 8 queue req: 5
  19937. 2025-07-20 17:05:21,343 - __main__ - INFO - Queue remaining: 0
  19938. 2025-07-20 17:05:21,344 - __main__ - INFO -
  19939. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19940. ----------------------------------------------------------------------------------
  19941. sglang_input_tokens 12.42 12.42
  19942. sglang_output_tokens 1.33 1.33
  19943. 2025-07-20 17:05:21,344 - __main__ - INFO -
  19944. Worker ID | finished | started
  19945. ----------+----------+--------
  19946. 0 | 1 | 14
  19947. 2025-07-20 17:05:21,638 - sglang - INFO - [2025-07-20 17:05:21 TP0] Decode batch. #running-req: 8, #token: 21199, token usage: 0.56, gen throughput (token/s): 361.55, #queue-req: 5
  19948. 2025-07-20 17:05:21,638 - __main__ - INFO - sglang running req: 8 queue req: 5
  19949. 2025-07-20 17:05:22,530 - sglang - INFO - [2025-07-20 17:05:22 TP0] Decode batch. #running-req: 8, #token: 21519, token usage: 0.57, gen throughput (token/s): 358.67, #queue-req: 5
  19950. 2025-07-20 17:05:22,530 - __main__ - INFO - sglang running req: 8 queue req: 5
  19951. 2025-07-20 17:05:23,418 - sglang - INFO - [2025-07-20 17:05:23 TP0] Decode batch. #running-req: 8, #token: 21839, token usage: 0.57, gen throughput (token/s): 360.39, #queue-req: 5
  19952. 2025-07-20 17:05:23,418 - __main__ - INFO - sglang running req: 8 queue req: 5
  19953. 2025-07-20 17:05:24,306 - sglang - INFO - [2025-07-20 17:05:24 TP0] Decode batch. #running-req: 8, #token: 22159, token usage: 0.58, gen throughput (token/s): 360.31, #queue-req: 5
  19954. 2025-07-20 17:05:24,306 - __main__ - INFO - sglang running req: 8 queue req: 5
  19955. 2025-07-20 17:05:25,195 - sglang - INFO - [2025-07-20 17:05:25 TP0] Decode batch. #running-req: 8, #token: 22479, token usage: 0.59, gen throughput (token/s): 359.83, #queue-req: 5
  19956. 2025-07-20 17:05:25,196 - __main__ - INFO - sglang running req: 8 queue req: 5
  19957. 2025-07-20 17:05:26,089 - sglang - INFO - [2025-07-20 17:05:26 TP0] Decode batch. #running-req: 8, #token: 22799, token usage: 0.60, gen throughput (token/s): 358.14, #queue-req: 5
  19958. 2025-07-20 17:05:26,089 - __main__ - INFO - sglang running req: 8 queue req: 5
  19959. 2025-07-20 17:05:26,982 - sglang - INFO - [2025-07-20 17:05:26 TP0] Decode batch. #running-req: 8, #token: 23119, token usage: 0.61, gen throughput (token/s): 358.06, #queue-req: 5
  19960. 2025-07-20 17:05:26,983 - __main__ - INFO - sglang running req: 8 queue req: 5
  19961. 2025-07-20 17:05:27,407 - sglang - INFO - [2025-07-20 17:05:27 TP0] Prefill batch. #new-seq: 2, #new-token: 4561, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 3
  19962. 2025-07-20 17:05:27,407 - __main__ - INFO - sglang running req: 7 queue req: 3
  19963. 2025-07-20 17:05:28,829 - sglang - INFO - [2025-07-20 17:05:28 TP0] Prefill batch. #new-seq: 2, #new-token: 4178, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.60, #running-req: 8, #queue-req: 1
  19964. 2025-07-20 17:05:28,829 - __main__ - INFO - sglang running req: 8 queue req: 1
  19965. 2025-07-20 17:05:30,568 - sglang - INFO - [2025-07-20 17:05:30 TP0] Decode batch. #running-req: 10, #token: 27341, token usage: 0.72, gen throughput (token/s): 99.85, #queue-req: 1
  19966. 2025-07-20 17:05:30,568 - __main__ - INFO - sglang running req: 10 queue req: 1
  19967. 2025-07-20 17:05:31,345 - __main__ - INFO - Queue remaining: 0
  19968. 2025-07-20 17:05:31,345 - __main__ - INFO -
  19969. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  19970. ----------------------------------------------------------------------------------
  19971. sglang_input_tokens 43.84 43.84
  19972. sglang_output_tokens 9.44 9.44
  19973. 2025-07-20 17:05:31,345 - __main__ - INFO -
  19974. Worker ID | finished | started
  19975. ----------+----------+--------
  19976. 0 | 3 | 14
  19977. 2025-07-20 17:05:31,527 - sglang - INFO - [2025-07-20 17:05:31 TP0] Decode batch. #running-req: 10, #token: 27741, token usage: 0.73, gen throughput (token/s): 417.09, #queue-req: 1
  19978. 2025-07-20 17:05:31,527 - __main__ - INFO - sglang running req: 10 queue req: 1
  19979. 2025-07-20 17:05:32,487 - sglang - INFO - [2025-07-20 17:05:32 TP0] Decode batch. #running-req: 10, #token: 28141, token usage: 0.74, gen throughput (token/s): 416.94, #queue-req: 1
  19980. 2025-07-20 17:05:32,487 - __main__ - INFO - sglang running req: 10 queue req: 1
  19981. 2025-07-20 17:05:33,449 - sglang - INFO - [2025-07-20 17:05:33 TP0] Decode batch. #running-req: 10, #token: 28541, token usage: 0.75, gen throughput (token/s): 415.43, #queue-req: 1
  19982. 2025-07-20 17:05:33,450 - __main__ - INFO - sglang running req: 10 queue req: 1
  19983. 2025-07-20 17:05:34,125 - sglang - INFO - [2025-07-20 17:05:34 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.68, #running-req: 9, #queue-req: 0
  19984. 2025-07-20 17:05:34,125 - __main__ - INFO - sglang running req: 9 queue req: 0
  19985. 2025-07-20 17:05:34,181 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  19986. 2025-07-20 17:05:35,148 - sglang - INFO - [2025-07-20 17:05:35 TP0] Decode batch. #running-req: 10, #token: 28504, token usage: 0.75, gen throughput (token/s): 234.85, #queue-req: 0
  19987. 2025-07-20 17:05:35,149 - __main__ - INFO - sglang running req: 10 queue req: 0
  19988. 2025-07-20 17:05:36,111 - sglang - INFO - [2025-07-20 17:05:36 TP0] Decode batch. #running-req: 10, #token: 28904, token usage: 0.76, gen throughput (token/s): 415.65, #queue-req: 0
  19989. 2025-07-20 17:05:36,111 - __main__ - INFO - sglang running req: 10 queue req: 0
  19990. 2025-07-20 17:05:37,078 - sglang - INFO - [2025-07-20 17:05:37 TP0] Decode batch. #running-req: 10, #token: 29304, token usage: 0.77, gen throughput (token/s): 413.52, #queue-req: 0
  19991. 2025-07-20 17:05:37,078 - __main__ - INFO - sglang running req: 10 queue req: 0
  19992. 2025-07-20 17:05:38,042 - sglang - INFO - [2025-07-20 17:05:38 TP0] Decode batch. #running-req: 10, #token: 29704, token usage: 0.78, gen throughput (token/s): 414.79, #queue-req: 0
  19993. 2025-07-20 17:05:38,042 - __main__ - INFO - sglang running req: 10 queue req: 0
  19994. 2025-07-20 17:05:39,003 - sglang - INFO - [2025-07-20 17:05:39 TP0] Decode batch. #running-req: 9, #token: 27890, token usage: 0.73, gen throughput (token/s): 403.74, #queue-req: 0
  19995. 2025-07-20 17:05:39,003 - __main__ - INFO - sglang running req: 9 queue req: 0
  19996. 2025-07-20 17:05:39,917 - sglang - INFO - [2025-07-20 17:05:39 TP0] Decode batch. #running-req: 7, #token: 20877, token usage: 0.55, gen throughput (token/s): 342.44, #queue-req: 0
  19997. 2025-07-20 17:05:39,918 - __main__ - INFO - sglang running req: 7 queue req: 0
  19998. 2025-07-20 17:05:40,809 - sglang - INFO - [2025-07-20 17:05:40 TP0] Decode batch. #running-req: 6, #token: 17525, token usage: 0.46, gen throughput (token/s): 306.18, #queue-req: 0
  19999. 2025-07-20 17:05:40,809 - __main__ - INFO - sglang running req: 6 queue req: 0
  20000. 2025-07-20 17:05:41,346 - __main__ - INFO - Queue remaining: 0
  20001. 2025-07-20 17:05:41,347 - __main__ - INFO -
  20002. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20003. ----------------------------------------------------------------------------------
  20004. sglang_input_tokens 154.67 154.67
  20005. sglang_output_tokens 42.48 42.48
  20006. 2025-07-20 17:05:41,347 - __main__ - INFO -
  20007. Worker ID | finished | started
  20008. ----------+----------+--------
  20009. 0 | 9 | 14
  20010. 2025-07-20 17:05:41,691 - sglang - INFO - [2025-07-20 17:05:41 TP0] Decode batch. #running-req: 5, #token: 14516, token usage: 0.38, gen throughput (token/s): 250.44, #queue-req: 0
  20011. 2025-07-20 17:05:41,692 - __main__ - INFO - sglang running req: 5 queue req: 0
  20012. 2025-07-20 17:05:42,567 - sglang - INFO - [2025-07-20 17:05:42 TP0] Decode batch. #running-req: 5, #token: 14716, token usage: 0.39, gen throughput (token/s): 228.46, #queue-req: 0
  20013. 2025-07-20 17:05:42,567 - __main__ - INFO - sglang running req: 5 queue req: 0
  20014. 2025-07-20 17:05:43,441 - sglang - INFO - [2025-07-20 17:05:43 TP0] Decode batch. #running-req: 5, #token: 14916, token usage: 0.39, gen throughput (token/s): 228.75, #queue-req: 0
  20015. 2025-07-20 17:05:43,441 - __main__ - INFO - sglang running req: 5 queue req: 0
  20016. 2025-07-20 17:05:44,294 - sglang - INFO - [2025-07-20 17:05:44 TP0] Decode batch. #running-req: 1, #token: 2806, token usage: 0.07, gen throughput (token/s): 144.16, #queue-req: 0
  20017. 2025-07-20 17:05:44,295 - __main__ - INFO - sglang running req: 1 queue req: 0
  20018. 2025-07-20 17:05:45,123 - sglang - INFO - [2025-07-20 17:05:45 TP0] Decode batch. #running-req: 1, #token: 2846, token usage: 0.07, gen throughput (token/s): 48.30, #queue-req: 0
  20019. 2025-07-20 17:05:45,123 - __main__ - INFO - sglang running req: 1 queue req: 0
  20020. 2025-07-20 17:05:45,950 - sglang - INFO - [2025-07-20 17:05:45 TP0] Decode batch. #running-req: 1, #token: 2886, token usage: 0.08, gen throughput (token/s): 48.34, #queue-req: 0
  20021. 2025-07-20 17:05:45,950 - __main__ - INFO - sglang running req: 1 queue req: 0
  20022. 2025-07-20 17:05:46,776 - sglang - INFO - [2025-07-20 17:05:46 TP0] Decode batch. #running-req: 1, #token: 2926, token usage: 0.08, gen throughput (token/s): 48.45, #queue-req: 0
  20023. 2025-07-20 17:05:46,776 - __main__ - INFO - sglang running req: 1 queue req: 0
  20024. 2025-07-20 17:05:47,603 - sglang - INFO - [2025-07-20 17:05:47 TP0] Decode batch. #running-req: 1, #token: 2966, token usage: 0.08, gen throughput (token/s): 48.37, #queue-req: 0
  20025. 2025-07-20 17:05:47,603 - __main__ - INFO - sglang running req: 1 queue req: 0
  20026. 2025-07-20 17:05:48,432 - sglang - INFO - [2025-07-20 17:05:48 TP0] Decode batch. #running-req: 1, #token: 3006, token usage: 0.08, gen throughput (token/s): 48.25, #queue-req: 0
  20027. 2025-07-20 17:05:48,432 - __main__ - INFO - sglang running req: 1 queue req: 0
  20028. 2025-07-20 17:05:49,267 - sglang - INFO - [2025-07-20 17:05:49 TP0] Decode batch. #running-req: 1, #token: 3046, token usage: 0.08, gen throughput (token/s): 47.86, #queue-req: 0
  20029. 2025-07-20 17:05:49,268 - __main__ - INFO - sglang running req: 1 queue req: 0
  20030. 2025-07-20 17:05:50,100 - sglang - INFO - [2025-07-20 17:05:50 TP0] Decode batch. #running-req: 1, #token: 3086, token usage: 0.08, gen throughput (token/s): 48.01, #queue-req: 0
  20031. 2025-07-20 17:05:50,101 - __main__ - INFO - sglang running req: 1 queue req: 0
  20032. 2025-07-20 17:05:50,935 - sglang - INFO - [2025-07-20 17:05:50 TP0] Decode batch. #running-req: 1, #token: 3126, token usage: 0.08, gen throughput (token/s): 47.96, #queue-req: 0
  20033. 2025-07-20 17:05:50,935 - __main__ - INFO - sglang running req: 1 queue req: 0
  20034. 2025-07-20 17:05:51,348 - __main__ - INFO - Queue remaining: 0
  20035. 2025-07-20 17:05:51,348 - __main__ - INFO -
  20036. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20037. ----------------------------------------------------------------------------------
  20038. sglang_input_tokens 212.53 212.53
  20039. sglang_output_tokens 58.00 58.00
  20040. 2025-07-20 17:05:51,348 - __main__ - INFO -
  20041. Worker ID | finished | started
  20042. ----------+----------+--------
  20043. 0 | 13 | 14
  20044. 2025-07-20 17:05:51,770 - sglang - INFO - [2025-07-20 17:05:51 TP0] Decode batch. #running-req: 1, #token: 3166, token usage: 0.08, gen throughput (token/s): 47.91, #queue-req: 0
  20045. 2025-07-20 17:05:51,770 - __main__ - INFO - sglang running req: 1 queue req: 0
  20046. 2025-07-20 17:05:52,601 - sglang - INFO - [2025-07-20 17:05:52 TP0] Decode batch. #running-req: 1, #token: 3206, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
  20047. 2025-07-20 17:05:52,601 - __main__ - INFO - sglang running req: 1 queue req: 0
  20048. 2025-07-20 17:05:53,431 - sglang - INFO - [2025-07-20 17:05:53 TP0] Decode batch. #running-req: 1, #token: 3246, token usage: 0.09, gen throughput (token/s): 48.23, #queue-req: 0
  20049. 2025-07-20 17:05:53,431 - __main__ - INFO - sglang running req: 1 queue req: 0
  20050. 2025-07-20 17:05:53,706 - __main__ - INFO - Finished TaskGroup for worker on b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
  20051. 2025-07-20 17:05:53,707 - __main__ - INFO - Got 1 docs for b3e78c0b6a8de664e1cb6a52a3489482f2f557b8
  20052. 2025-07-20 17:05:53,709 - __main__ - INFO - Worker 0 exiting due to empty queue
  20053. 2025-07-20 17:05:53,710 - __main__ - INFO - Work done
  20054. 2025-07-20 17:05:53,710 - __main__ - INFO - Got cancellation request for SGLang server
  20055. 2025-07-20 17:08:01,104 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  20056. 2025-07-20 17:08:01,104 - __main__ - INFO - Loading file at olmocr_workspace/job_1753002474/input.pdf as PDF document
  20057. 2025-07-20 17:08:01,105 - __main__ - INFO - Found 1 total pdf paths to add
  20058. 2025-07-20 17:08:01,109 - __main__ - INFO - Calculated items_per_group: 35 based on average pages per PDF: 14.00
  20059. 2025-07-20 17:08:01,338 - __main__ - INFO - Starting pipeline with PID 628948
  20060. 2025-07-20 17:08:01,339 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  20061. 2025-07-20 17:08:01,425 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  20062. 2025-07-20 17:08:02,457 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  20063. 2025-07-20 17:08:03,505 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  20064. 2025-07-20 17:08:04,570 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  20065. 2025-07-20 17:08:05,639 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  20066. 2025-07-20 17:08:06,710 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  20067. 2025-07-20 17:08:07,779 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  20068. 2025-07-20 17:08:08,853 - sglang - INFO - [2025-07-20 17:08:08] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=436041262, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  20069. 2025-07-20 17:08:08,854 - __main__ - INFO - [2025-07-20 17:08:08] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=436041262, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  20070. 2025-07-20 17:08:08,934 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  20071. 2025-07-20 17:08:09,809 - sglang - INFO - [2025-07-20 17:08:09] Use chat template for the OpenAI-compatible API server: qwen2-vl
  20072. 2025-07-20 17:08:09,809 - __main__ - INFO - [2025-07-20 17:08:09] Use chat template for the OpenAI-compatible API server: qwen2-vl
  20073. 2025-07-20 17:08:09,995 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  20074. 2025-07-20 17:08:11,065 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  20075. 2025-07-20 17:08:12,133 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  20076. 2025-07-20 17:08:13,208 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  20077. 2025-07-20 17:08:14,345 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  20078. 2025-07-20 17:08:15,409 - sglang - INFO - [2025-07-20 17:08:15 TP0] Overlap scheduler is disabled for multimodal models.
  20079. 2025-07-20 17:08:15,409 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Overlap scheduler is disabled for multimodal models.
  20080. 2025-07-20 17:08:15,410 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  20081. 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  20082. 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  20083. 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  20084. 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  20085. 2025-07-20 17:08:15,411 - sglang - INFO - [2025-07-20 17:08:15 TP0] Init torch distributed begin.
  20086. 2025-07-20 17:08:15,411 - __main__ - INFO - [2025-07-20 17:08:15 TP0] Init torch distributed begin.
  20087. 2025-07-20 17:08:16,473 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  20088. 2025-07-20 17:08:17,528 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  20089. 2025-07-20 17:08:18,594 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  20090. 2025-07-20 17:08:19,659 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  20091. 2025-07-20 17:08:20,732 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  20092. 2025-07-20 17:08:20,765 - sglang - INFO - [2025-07-20 17:08:20 TP0] Load weight begin. avail mem=23.33 GB
  20093. 2025-07-20 17:08:20,765 - __main__ - INFO - [2025-07-20 17:08:20 TP0] Load weight begin. avail mem=23.33 GB
  20094. 2025-07-20 17:08:21,425 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  20095. 2025-07-20 17:08:21,425 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  20096. 2025-07-20 17:08:21,792 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  20097. 2025-07-20 17:08:22,859 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  20098. 2025-07-20 17:08:23,926 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  20099. 2025-07-20 17:08:24,992 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  20100. 2025-07-20 17:08:26,049 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  20101. 2025-07-20 17:08:27,119 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  20102. 2025-07-20 17:08:28,189 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  20103. 2025-07-20 17:08:29,252 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  20104. 2025-07-20 17:08:30,318 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  20105. 2025-07-20 17:08:31,386 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  20106. 2025-07-20 17:08:32,452 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  20107. 2025-07-20 17:08:33,520 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  20108. 2025-07-20 17:08:34,574 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  20109. 2025-07-20 17:08:35,096 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:41, 13.67s/it]
  20110. 2025-07-20 17:08:35,097 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:13<00:41, 13.67s/it]
  20111. 2025-07-20 17:08:35,652 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  20112. 2025-07-20 17:08:36,722 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  20113. 2025-07-20 17:08:37,782 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  20114. 2025-07-20 17:08:38,836 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  20115. 2025-07-20 17:08:39,673 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:18<00:16, 8.32s/it]
  20116. 2025-07-20 17:08:39,673 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:18<00:16, 8.32s/it]
  20117. 2025-07-20 17:08:39,882 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  20118. 2025-07-20 17:08:40,934 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  20119. 2025-07-20 17:08:42,000 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  20120. 2025-07-20 17:08:43,066 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  20121. 2025-07-20 17:08:44,138 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  20122. 2025-07-20 17:08:45,208 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  20123. 2025-07-20 17:08:46,278 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  20124. 2025-07-20 17:08:47,348 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  20125. 2025-07-20 17:08:48,415 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  20126. 2025-07-20 17:08:49,482 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  20127. 2025-07-20 17:08:49,575 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:28<00:09, 9.04s/it]
  20128. 2025-07-20 17:08:49,576 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:28<00:09, 9.04s/it]
  20129. 2025-07-20 17:08:50,001 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 5.64s/it]
  20130. 2025-07-20 17:08:50,001 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 5.64s/it]
  20131. 2025-07-20 17:08:50,001 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.14s/it]
  20132. 2025-07-20 17:08:50,001 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.14s/it]
  20133. 2025-07-20 17:08:50,001 - sglang - INFO -
  20134. 2025-07-20 17:08:50,001 - __main__ - INFO -
  20135. 2025-07-20 17:08:50,058 - sglang - INFO - [2025-07-20 17:08:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  20136. 2025-07-20 17:08:50,058 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  20137. 2025-07-20 17:08:50,065 - sglang - INFO - [2025-07-20 17:08:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  20138. 2025-07-20 17:08:50,065 - __main__ - INFO - [2025-07-20 17:08:50 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  20139. 2025-07-20 17:08:50,066 - sglang - INFO - [2025-07-20 17:08:50 TP0] Memory pool end. avail mem=5.30 GB
  20140. 2025-07-20 17:08:50,066 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Memory pool end. avail mem=5.30 GB
  20141. 2025-07-20 17:08:50,240 - sglang - INFO - [2025-07-20 17:08:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  20142. 2025-07-20 17:08:50,240 - __main__ - INFO - [2025-07-20 17:08:50 TP0] Capture cuda graph begin. This can take up to several minutes.
  20143. 2025-07-20 17:08:50,562 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  20144. 2025-07-20 17:08:51,621 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  20145. 2025-07-20 17:08:52,344 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.20s/it] 50%|█████ | 2/4 [00:01<00:01, 1.49it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.00it/s] 100%|██████████| 4/4 [00:02<00:00, 2.37it/s] 100%|██████████| 4/4 [00:02<00:00, 1.90it/s]
  20146. 2025-07-20 17:08:52,345 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.20s/it] 50%|█████ | 2/4 [00:01<00:01, 1.49it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.00it/s] 100%|██████████| 4/4 [00:02<00:00, 2.37it/s] 100%|██████████| 4/4 [00:02<00:00, 1.90it/s]
  20147. 2025-07-20 17:08:52,345 - sglang - INFO - [2025-07-20 17:08:52 TP0] Capture cuda graph end. Time elapsed: 2.10 s
  20148. 2025-07-20 17:08:52,345 - __main__ - INFO - [2025-07-20 17:08:52 TP0] Capture cuda graph end. Time elapsed: 2.10 s
  20149. 2025-07-20 17:08:52,700 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  20150. 2025-07-20 17:08:53,118 - sglang - INFO - [2025-07-20 17:08:53 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  20151. 2025-07-20 17:08:53,118 - __main__ - INFO - [2025-07-20 17:08:53 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  20152. 2025-07-20 17:08:53,800 - __main__ - INFO - sglang server is ready.
  20153. 2025-07-20 17:08:53,800 - __main__ - INFO - Queue remaining: 1
  20154. 2025-07-20 17:08:53,800 - __main__ - INFO -
  20155. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20156. ----------------------------------------------------------------------------------
  20157. 2025-07-20 17:08:53,800 - __main__ - INFO -
  20158. Worker ID
  20159. ---------
  20160. 2025-07-20 17:08:53,801 - __main__ - INFO - Worker 0 processing work item dbf48ba2e0ba653560d78d753cde2080c6a38613
  20161. 2025-07-20 17:08:53,801 - __main__ - INFO - Created all tasks for dbf48ba2e0ba653560d78d753cde2080c6a38613
  20162. 2025-07-20 17:08:53,805 - __main__ - INFO - Got 14 pages to do for olmocr_workspace/job_1753002474/input.pdf in worker 0
  20163. 2025-07-20 17:08:54,242 - sglang - INFO - [2025-07-20 17:08:54 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20164. 2025-07-20 17:08:54,242 - __main__ - INFO - [2025-07-20 17:08:54 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20165. 2025-07-20 17:08:54,242 - __main__ - INFO - sglang running req: 0 queue req: 0
  20166. 2025-07-20 17:08:55,157 - sglang - INFO - [2025-07-20 17:08:55] The server is fired up and ready to roll!
  20167. 2025-07-20 17:08:55,157 - __main__ - INFO - [2025-07-20 17:08:55] The server is fired up and ready to roll!
  20168. 2025-07-20 17:09:00,720 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-2
  20169. 2025-07-20 17:09:00,726 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-1
  20170. 2025-07-20 17:09:00,745 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-3
  20171. 2025-07-20 17:09:00,778 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-4
  20172. 2025-07-20 17:09:00,785 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-5
  20173. 2025-07-20 17:09:00,812 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-6
  20174. 2025-07-20 17:09:00,826 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-8
  20175. 2025-07-20 17:09:00,908 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-10
  20176. 2025-07-20 17:09:00,940 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-7
  20177. 2025-07-20 17:09:00,957 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-11
  20178. 2025-07-20 17:09:00,963 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-14
  20179. 2025-07-20 17:09:00,977 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-12
  20180. 2025-07-20 17:09:00,994 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-13
  20181. 2025-07-20 17:09:01,003 - __main__ - INFO - Built page query for olmocr_workspace/job_1753002474/input.pdf-9
  20182. 2025-07-20 17:09:03,833 - __main__ - INFO - Queue remaining: 0
  20183. 2025-07-20 17:09:03,833 - __main__ - INFO -
  20184. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20185. ----------------------------------------------------------------------------------
  20186. 2025-07-20 17:09:03,833 - __main__ - INFO -
  20187. Worker ID | started
  20188. ----------+--------
  20189. 0 | 14
  20190. 2025-07-20 17:09:13,835 - __main__ - INFO - Queue remaining: 0
  20191. 2025-07-20 17:09:13,835 - __main__ - INFO -
  20192. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20193. ----------------------------------------------------------------------------------
  20194. 2025-07-20 17:09:13,836 - __main__ - INFO -
  20195. Worker ID | started
  20196. ----------+--------
  20197. 0 | 14
  20198. 2025-07-20 17:09:21,600 - sglang - INFO - [2025-07-20 17:09:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2170, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20199. 2025-07-20 17:09:21,600 - __main__ - INFO - sglang running req: 0 queue req: 0
  20200. 2025-07-20 17:09:23,294 - sglang - INFO - [2025-07-20 17:09:23 TP0] Prefill batch. #new-seq: 6, #new-token: 13163, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 7
  20201. 2025-07-20 17:09:23,294 - __main__ - INFO - sglang running req: 1 queue req: 7
  20202. 2025-07-20 17:09:23,837 - __main__ - INFO - Queue remaining: 0
  20203. 2025-07-20 17:09:23,837 - __main__ - INFO -
  20204. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20205. ----------------------------------------------------------------------------------
  20206. 2025-07-20 17:09:23,838 - __main__ - INFO -
  20207. Worker ID | started
  20208. ----------+--------
  20209. 0 | 14
  20210. 2025-07-20 17:09:28,111 - sglang - INFO - [2025-07-20 17:09:28 TP0] Decode batch. #running-req: 7, #token: 15564, token usage: 0.41, gen throughput (token/s): 6.80, #queue-req: 7
  20211. 2025-07-20 17:09:28,111 - __main__ - INFO - sglang running req: 7 queue req: 7
  20212. 2025-07-20 17:09:28,986 - sglang - INFO - [2025-07-20 17:09:28 TP0] Decode batch. #running-req: 7, #token: 15844, token usage: 0.42, gen throughput (token/s): 319.66, #queue-req: 7
  20213. 2025-07-20 17:09:28,987 - __main__ - INFO - sglang running req: 7 queue req: 7
  20214. 2025-07-20 17:09:29,863 - sglang - INFO - [2025-07-20 17:09:29 TP0] Decode batch. #running-req: 7, #token: 16124, token usage: 0.42, gen throughput (token/s): 319.40, #queue-req: 7
  20215. 2025-07-20 17:09:29,863 - __main__ - INFO - sglang running req: 7 queue req: 7
  20216. 2025-07-20 17:09:30,586 - sglang - INFO - [2025-07-20 17:09:30 TP0] Prefill batch. #new-seq: 2, #new-token: 5633, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 5
  20217. 2025-07-20 17:09:30,586 - __main__ - INFO - sglang running req: 6 queue req: 5
  20218. 2025-07-20 17:09:32,328 - sglang - INFO - [2025-07-20 17:09:32 TP0] Decode batch. #running-req: 8, #token: 20533, token usage: 0.54, gen throughput (token/s): 116.05, #queue-req: 5
  20219. 2025-07-20 17:09:32,328 - __main__ - INFO - sglang running req: 8 queue req: 5
  20220. 2025-07-20 17:09:33,216 - sglang - INFO - [2025-07-20 17:09:33 TP0] Decode batch. #running-req: 8, #token: 20853, token usage: 0.55, gen throughput (token/s): 360.13, #queue-req: 5
  20221. 2025-07-20 17:09:33,216 - __main__ - INFO - sglang running req: 8 queue req: 5
  20222. 2025-07-20 17:09:33,839 - __main__ - INFO - Queue remaining: 0
  20223. 2025-07-20 17:09:33,839 - __main__ - INFO -
  20224. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20225. ----------------------------------------------------------------------------------
  20226. sglang_input_tokens 14.72 14.72
  20227. sglang_output_tokens 1.57 1.57
  20228. 2025-07-20 17:09:33,839 - __main__ - INFO -
  20229. Worker ID | finished | started
  20230. ----------+----------+--------
  20231. 0 | 1 | 14
  20232. 2025-07-20 17:09:34,107 - sglang - INFO - [2025-07-20 17:09:34 TP0] Decode batch. #running-req: 8, #token: 21173, token usage: 0.56, gen throughput (token/s): 359.20, #queue-req: 5
  20233. 2025-07-20 17:09:34,107 - __main__ - INFO - sglang running req: 8 queue req: 5
  20234. 2025-07-20 17:09:34,997 - sglang - INFO - [2025-07-20 17:09:34 TP0] Decode batch. #running-req: 8, #token: 21493, token usage: 0.57, gen throughput (token/s): 359.48, #queue-req: 5
  20235. 2025-07-20 17:09:34,997 - __main__ - INFO - sglang running req: 8 queue req: 5
  20236. 2025-07-20 17:09:35,889 - sglang - INFO - [2025-07-20 17:09:35 TP0] Decode batch. #running-req: 8, #token: 21813, token usage: 0.57, gen throughput (token/s): 358.90, #queue-req: 5
  20237. 2025-07-20 17:09:35,889 - __main__ - INFO - sglang running req: 8 queue req: 5
  20238. 2025-07-20 17:09:36,784 - sglang - INFO - [2025-07-20 17:09:36 TP0] Decode batch. #running-req: 8, #token: 22133, token usage: 0.58, gen throughput (token/s): 357.66, #queue-req: 5
  20239. 2025-07-20 17:09:36,784 - __main__ - INFO - sglang running req: 8 queue req: 5
  20240. 2025-07-20 17:09:37,674 - sglang - INFO - [2025-07-20 17:09:37 TP0] Decode batch. #running-req: 8, #token: 22453, token usage: 0.59, gen throughput (token/s): 359.28, #queue-req: 5
  20241. 2025-07-20 17:09:37,674 - __main__ - INFO - sglang running req: 8 queue req: 5
  20242. 2025-07-20 17:09:38,564 - sglang - INFO - [2025-07-20 17:09:38 TP0] Decode batch. #running-req: 8, #token: 22773, token usage: 0.60, gen throughput (token/s): 359.64, #queue-req: 5
  20243. 2025-07-20 17:09:38,564 - __main__ - INFO - sglang running req: 8 queue req: 5
  20244. 2025-07-20 17:09:39,454 - sglang - INFO - [2025-07-20 17:09:39 TP0] Decode batch. #running-req: 8, #token: 23093, token usage: 0.61, gen throughput (token/s): 359.65, #queue-req: 5
  20245. 2025-07-20 17:09:39,454 - __main__ - INFO - sglang running req: 8 queue req: 5
  20246. 2025-07-20 17:09:39,835 - sglang - INFO - [2025-07-20 17:09:39 TP0] Prefill batch. #new-seq: 2, #new-token: 4020, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 7, #queue-req: 3
  20247. 2025-07-20 17:09:39,835 - __main__ - INFO - sglang running req: 7 queue req: 3
  20248. 2025-07-20 17:09:41,635 - sglang - INFO - [2025-07-20 17:09:41 TP0] Decode batch. #running-req: 9, #token: 25145, token usage: 0.66, gen throughput (token/s): 156.81, #queue-req: 3
  20249. 2025-07-20 17:09:41,635 - __main__ - INFO - sglang running req: 9 queue req: 3
  20250. 2025-07-20 17:09:41,752 - sglang - INFO - [2025-07-20 17:09:41 TP0] Prefill batch. #new-seq: 2, #new-token: 4745, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.59, #running-req: 8, #queue-req: 1
  20251. 2025-07-20 17:09:41,752 - __main__ - INFO - sglang running req: 8 queue req: 1
  20252. 2025-07-20 17:09:43,603 - sglang - INFO - [2025-07-20 17:09:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2394, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 9, #queue-req: 0
  20253. 2025-07-20 17:09:43,603 - __main__ - INFO - sglang running req: 9 queue req: 0
  20254. 2025-07-20 17:09:43,680 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  20255. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 1 exiting due to empty queue
  20256. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 2 exiting due to empty queue
  20257. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 3 exiting due to empty queue
  20258. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 4 exiting due to empty queue
  20259. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 5 exiting due to empty queue
  20260. 2025-07-20 17:09:43,681 - __main__ - INFO - Worker 6 exiting due to empty queue
  20261. 2025-07-20 17:09:43,682 - __main__ - INFO - Worker 7 exiting due to empty queue
  20262. 2025-07-20 17:09:43,840 - __main__ - INFO - Queue remaining: 0
  20263. 2025-07-20 17:09:43,841 - __main__ - INFO -
  20264. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20265. ----------------------------------------------------------------------------------
  20266. sglang_input_tokens 74.67 74.67
  20267. sglang_output_tokens 16.46 16.46
  20268. 2025-07-20 17:09:43,841 - __main__ - INFO -
  20269. Worker ID | finished | started
  20270. ----------+----------+--------
  20271. 0 | 4 | 14
  20272. 2025-07-20 17:09:44,715 - sglang - INFO - [2025-07-20 17:09:44 TP0] Decode batch. #running-req: 10, #token: 27138, token usage: 0.71, gen throughput (token/s): 127.57, #queue-req: 0
  20273. 2025-07-20 17:09:44,716 - __main__ - INFO - sglang running req: 10 queue req: 0
  20274. 2025-07-20 17:09:45,670 - sglang - INFO - [2025-07-20 17:09:45 TP0] Decode batch. #running-req: 10, #token: 27538, token usage: 0.72, gen throughput (token/s): 419.16, #queue-req: 0
  20275. 2025-07-20 17:09:45,670 - __main__ - INFO - sglang running req: 10 queue req: 0
  20276. 2025-07-20 17:09:46,624 - sglang - INFO - [2025-07-20 17:09:46 TP0] Decode batch. #running-req: 10, #token: 27938, token usage: 0.74, gen throughput (token/s): 419.29, #queue-req: 0
  20277. 2025-07-20 17:09:46,624 - __main__ - INFO - sglang running req: 10 queue req: 0
  20278. 2025-07-20 17:09:47,578 - sglang - INFO - [2025-07-20 17:09:47 TP0] Decode batch. #running-req: 9, #token: 25495, token usage: 0.67, gen throughput (token/s): 405.55, #queue-req: 0
  20279. 2025-07-20 17:09:47,578 - __main__ - INFO - sglang running req: 9 queue req: 0
  20280. 2025-07-20 17:09:48,532 - sglang - INFO - [2025-07-20 17:09:48 TP0] Decode batch. #running-req: 9, #token: 25855, token usage: 0.68, gen throughput (token/s): 377.30, #queue-req: 0
  20281. 2025-07-20 17:09:48,532 - __main__ - INFO - sglang running req: 9 queue req: 0
  20282. 2025-07-20 17:09:49,488 - sglang - INFO - [2025-07-20 17:09:49 TP0] Decode batch. #running-req: 9, #token: 26215, token usage: 0.69, gen throughput (token/s): 376.66, #queue-req: 0
  20283. 2025-07-20 17:09:49,488 - __main__ - INFO - sglang running req: 9 queue req: 0
  20284. 2025-07-20 17:09:50,444 - sglang - INFO - [2025-07-20 17:09:50 TP0] Decode batch. #running-req: 9, #token: 26575, token usage: 0.70, gen throughput (token/s): 376.72, #queue-req: 0
  20285. 2025-07-20 17:09:50,444 - __main__ - INFO - sglang running req: 9 queue req: 0
  20286. 2025-07-20 17:09:51,383 - sglang - INFO - [2025-07-20 17:09:51 TP0] Decode batch. #running-req: 8, #token: 24717, token usage: 0.65, gen throughput (token/s): 366.30, #queue-req: 0
  20287. 2025-07-20 17:09:51,383 - __main__ - INFO - sglang running req: 8 queue req: 0
  20288. 2025-07-20 17:09:52,278 - sglang - INFO - [2025-07-20 17:09:52 TP0] Decode batch. #running-req: 7, #token: 21392, token usage: 0.56, gen throughput (token/s): 328.44, #queue-req: 0
  20289. 2025-07-20 17:09:52,278 - __main__ - INFO - sglang running req: 7 queue req: 0
  20290. 2025-07-20 17:09:53,165 - sglang - INFO - [2025-07-20 17:09:53 TP0] Decode batch. #running-req: 6, #token: 18040, token usage: 0.47, gen throughput (token/s): 303.26, #queue-req: 0
  20291. 2025-07-20 17:09:53,165 - __main__ - INFO - sglang running req: 6 queue req: 0
  20292. 2025-07-20 17:09:53,841 - __main__ - INFO - Queue remaining: 0
  20293. 2025-07-20 17:09:53,842 - __main__ - INFO -
  20294. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20295. ----------------------------------------------------------------------------------
  20296. sglang_input_tokens 152.62 152.62
  20297. sglang_output_tokens 39.30 39.30
  20298. 2025-07-20 17:09:53,842 - __main__ - INFO -
  20299. Worker ID | finished | started
  20300. ----------+----------+--------
  20301. 0 | 8 | 14
  20302. 2025-07-20 17:09:54,043 - sglang - INFO - [2025-07-20 17:09:54 TP0] Decode batch. #running-req: 6, #token: 18280, token usage: 0.48, gen throughput (token/s): 273.18, #queue-req: 0
  20303. 2025-07-20 17:09:54,044 - __main__ - INFO - sglang running req: 6 queue req: 0
  20304. 2025-07-20 17:09:54,927 - sglang - INFO - [2025-07-20 17:09:54 TP0] Decode batch. #running-req: 6, #token: 18520, token usage: 0.49, gen throughput (token/s): 271.55, #queue-req: 0
  20305. 2025-07-20 17:09:54,928 - __main__ - INFO - sglang running req: 6 queue req: 0
  20306. 2025-07-20 17:09:55,805 - sglang - INFO - [2025-07-20 17:09:55 TP0] Decode batch. #running-req: 5, #token: 15018, token usage: 0.40, gen throughput (token/s): 233.55, #queue-req: 0
  20307. 2025-07-20 17:09:55,805 - __main__ - INFO - sglang running req: 5 queue req: 0
  20308. 2025-07-20 17:09:56,671 - sglang - INFO - [2025-07-20 17:09:56 TP0] Decode batch. #running-req: 3, #token: 8785, token usage: 0.23, gen throughput (token/s): 190.57, #queue-req: 0
  20309. 2025-07-20 17:09:56,671 - __main__ - INFO - sglang running req: 3 queue req: 0
  20310. 2025-07-20 17:09:57,510 - sglang - INFO - [2025-07-20 17:09:57 TP0] Decode batch. #running-req: 2, #token: 6007, token usage: 0.16, gen throughput (token/s): 109.58, #queue-req: 0
  20311. 2025-07-20 17:09:57,511 - __main__ - INFO - sglang running req: 2 queue req: 0
  20312. 2025-07-20 17:09:58,346 - sglang - INFO - [2025-07-20 17:09:58 TP0] Decode batch. #running-req: 2, #token: 6087, token usage: 0.16, gen throughput (token/s): 95.80, #queue-req: 0
  20313. 2025-07-20 17:09:58,346 - __main__ - INFO - sglang running req: 2 queue req: 0
  20314. 2025-07-20 17:09:59,183 - sglang - INFO - [2025-07-20 17:09:59 TP0] Decode batch. #running-req: 2, #token: 6167, token usage: 0.16, gen throughput (token/s): 95.57, #queue-req: 0
  20315. 2025-07-20 17:09:59,183 - __main__ - INFO - sglang running req: 2 queue req: 0
  20316. 2025-07-20 17:10:00,018 - sglang - INFO - [2025-07-20 17:10:00 TP0] Decode batch. #running-req: 2, #token: 6247, token usage: 0.16, gen throughput (token/s): 95.80, #queue-req: 0
  20317. 2025-07-20 17:10:00,018 - __main__ - INFO - sglang running req: 2 queue req: 0
  20318. 2025-07-20 17:10:00,853 - sglang - INFO - [2025-07-20 17:10:00 TP0] Decode batch. #running-req: 2, #token: 6327, token usage: 0.17, gen throughput (token/s): 95.81, #queue-req: 0
  20319. 2025-07-20 17:10:00,853 - __main__ - INFO - sglang running req: 2 queue req: 0
  20320. 2025-07-20 17:10:01,686 - sglang - INFO - [2025-07-20 17:10:01 TP0] Decode batch. #running-req: 1, #token: 3170, token usage: 0.08, gen throughput (token/s): 87.57, #queue-req: 0
  20321. 2025-07-20 17:10:01,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  20322. 2025-07-20 17:10:02,514 - sglang - INFO - [2025-07-20 17:10:02 TP0] Decode batch. #running-req: 1, #token: 3210, token usage: 0.08, gen throughput (token/s): 48.32, #queue-req: 0
  20323. 2025-07-20 17:10:02,514 - __main__ - INFO - sglang running req: 1 queue req: 0
  20324. 2025-07-20 17:10:03,348 - sglang - INFO - [2025-07-20 17:10:03 TP0] Decode batch. #running-req: 1, #token: 3250, token usage: 0.09, gen throughput (token/s): 47.96, #queue-req: 0
  20325. 2025-07-20 17:10:03,348 - __main__ - INFO - sglang running req: 1 queue req: 0
  20326. 2025-07-20 17:10:03,543 - __main__ - INFO - Finished TaskGroup for worker on dbf48ba2e0ba653560d78d753cde2080c6a38613
  20327. 2025-07-20 17:10:03,543 - __main__ - INFO - Got 1 docs for dbf48ba2e0ba653560d78d753cde2080c6a38613
  20328. 2025-07-20 17:10:03,545 - __main__ - INFO - Worker 0 exiting due to empty queue
  20329. 2025-07-20 17:10:03,545 - __main__ - INFO - Work done
  20330. 2025-07-20 17:10:03,546 - __main__ - INFO - Got cancellation request for SGLang server
  20331. 2025-07-20 17:17:48,853 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  20332. 2025-07-20 17:17:48,853 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  20333. 2025-07-20 17:17:48,853 - __main__ - INFO - Found 1 total pdf paths to add
  20334. 2025-07-20 17:17:48,858 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 11.00
  20335. 2025-07-20 17:17:49,044 - __main__ - INFO - Starting pipeline with PID 631182
  20336. 2025-07-20 17:17:49,045 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  20337. 2025-07-20 17:17:49,128 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  20338. 2025-07-20 17:17:50,158 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  20339. 2025-07-20 17:17:51,204 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  20340. 2025-07-20 17:17:52,268 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  20341. 2025-07-20 17:17:53,336 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  20342. 2025-07-20 17:17:54,390 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  20343. 2025-07-20 17:17:55,474 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  20344. 2025-07-20 17:17:56,517 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  20345. 2025-07-20 17:17:56,945 - sglang - INFO - [2025-07-20 17:17:56] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=531941470, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  20346. 2025-07-20 17:17:56,946 - __main__ - INFO - [2025-07-20 17:17:56] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=531941470, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  20347. 2025-07-20 17:17:57,567 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  20348. 2025-07-20 17:17:57,998 - sglang - INFO - [2025-07-20 17:17:57] Use chat template for the OpenAI-compatible API server: qwen2-vl
  20349. 2025-07-20 17:17:57,998 - __main__ - INFO - [2025-07-20 17:17:57] Use chat template for the OpenAI-compatible API server: qwen2-vl
  20350. 2025-07-20 17:17:58,642 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  20351. 2025-07-20 17:17:59,710 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  20352. 2025-07-20 17:18:00,777 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  20353. 2025-07-20 17:18:01,845 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  20354. 2025-07-20 17:18:02,913 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  20355. 2025-07-20 17:18:03,703 - sglang - INFO - [2025-07-20 17:18:03 TP0] Overlap scheduler is disabled for multimodal models.
  20356. 2025-07-20 17:18:03,703 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Overlap scheduler is disabled for multimodal models.
  20357. 2025-07-20 17:18:03,705 - sglang - INFO - [2025-07-20 17:18:03 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  20358. 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  20359. 2025-07-20 17:18:03,706 - sglang - INFO - [2025-07-20 17:18:03 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  20360. 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  20361. 2025-07-20 17:18:03,706 - sglang - INFO - [2025-07-20 17:18:03 TP0] Init torch distributed begin.
  20362. 2025-07-20 17:18:03,706 - __main__ - INFO - [2025-07-20 17:18:03 TP0] Init torch distributed begin.
  20363. 2025-07-20 17:18:03,990 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  20364. 2025-07-20 17:18:05,057 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  20365. 2025-07-20 17:18:06,130 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  20366. 2025-07-20 17:18:07,198 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  20367. 2025-07-20 17:18:08,262 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  20368. 2025-07-20 17:18:09,051 - sglang - INFO - [2025-07-20 17:18:09 TP0] Load weight begin. avail mem=23.33 GB
  20369. 2025-07-20 17:18:09,052 - __main__ - INFO - [2025-07-20 17:18:09 TP0] Load weight begin. avail mem=23.33 GB
  20370. 2025-07-20 17:18:09,329 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  20371. 2025-07-20 17:18:09,727 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  20372. 2025-07-20 17:18:09,727 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  20373. 2025-07-20 17:18:10,385 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  20374. 2025-07-20 17:18:11,452 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  20375. 2025-07-20 17:18:12,507 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  20376. 2025-07-20 17:18:13,574 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  20377. 2025-07-20 17:18:14,642 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  20378. 2025-07-20 17:18:15,710 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  20379. 2025-07-20 17:18:16,777 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  20380. 2025-07-20 17:18:17,845 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  20381. 2025-07-20 17:18:18,919 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  20382. 2025-07-20 17:18:19,634 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:29, 9.91s/it]
  20383. 2025-07-20 17:18:19,635 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:29, 9.91s/it]
  20384. 2025-07-20 17:18:19,997 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  20385. 2025-07-20 17:18:21,070 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  20386. 2025-07-20 17:18:22,138 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  20387. 2025-07-20 17:18:23,206 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  20388. 2025-07-20 17:18:24,269 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  20389. 2025-07-20 17:18:25,333 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  20390. 2025-07-20 17:18:26,389 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  20391. 2025-07-20 17:18:27,456 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  20392. 2025-07-20 17:18:28,524 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  20393. 2025-07-20 17:18:29,591 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  20394. 2025-07-20 17:18:30,660 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  20395. 2025-07-20 17:18:30,854 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:21<00:21, 10.68s/it]
  20396. 2025-07-20 17:18:30,854 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:21<00:21, 10.68s/it]
  20397. 2025-07-20 17:18:31,736 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  20398. 2025-07-20 17:18:32,804 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  20399. 2025-07-20 17:18:33,873 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  20400. 2025-07-20 17:18:34,128 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:24<00:07, 7.30s/it]
  20401. 2025-07-20 17:18:34,129 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:24<00:07, 7.30s/it]
  20402. 2025-07-20 17:18:34,949 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  20403. 2025-07-20 17:18:36,017 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  20404. 2025-07-20 17:18:37,090 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  20405. 2025-07-20 17:18:38,158 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  20406. 2025-07-20 17:18:38,648 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 6.20s/it]
  20407. 2025-07-20 17:18:38,648 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 6.20s/it]
  20408. 2025-07-20 17:18:38,648 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.23s/it]
  20409. 2025-07-20 17:18:38,648 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:28<00:00, 7.23s/it]
  20410. 2025-07-20 17:18:38,648 - sglang - INFO -
  20411. 2025-07-20 17:18:38,648 - __main__ - INFO -
  20412. 2025-07-20 17:18:38,734 - sglang - INFO - [2025-07-20 17:18:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  20413. 2025-07-20 17:18:38,734 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  20414. 2025-07-20 17:18:38,741 - sglang - INFO - [2025-07-20 17:18:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  20415. 2025-07-20 17:18:38,741 - __main__ - INFO - [2025-07-20 17:18:38 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  20416. 2025-07-20 17:18:38,742 - sglang - INFO - [2025-07-20 17:18:38 TP0] Memory pool end. avail mem=5.30 GB
  20417. 2025-07-20 17:18:38,742 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Memory pool end. avail mem=5.30 GB
  20418. 2025-07-20 17:18:38,923 - sglang - INFO - [2025-07-20 17:18:38 TP0] Capture cuda graph begin. This can take up to several minutes.
  20419. 2025-07-20 17:18:38,923 - __main__ - INFO - [2025-07-20 17:18:38 TP0] Capture cuda graph begin. This can take up to several minutes.
  20420. 2025-07-20 17:18:39,215 - __main__ - WARNING - Attempt 48: Please wait for sglang server to become ready...
  20421. 2025-07-20 17:18:40,272 - __main__ - WARNING - Attempt 49: Please wait for sglang server to become ready...
  20422. 2025-07-20 17:18:41,062 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.23s/it] 50%|█████ | 2/4 [00:01<00:01, 1.46it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.97it/s] 100%|██████████| 4/4 [00:02<00:00, 2.34it/s] 100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
  20423. 2025-07-20 17:18:41,062 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.23s/it] 50%|█████ | 2/4 [00:01<00:01, 1.46it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.97it/s] 100%|██████████| 4/4 [00:02<00:00, 2.34it/s] 100%|██████████| 4/4 [00:02<00:00, 1.87it/s]
  20424. 2025-07-20 17:18:41,062 - sglang - INFO - [2025-07-20 17:18:41 TP0] Capture cuda graph end. Time elapsed: 2.14 s
  20425. 2025-07-20 17:18:41,062 - __main__ - INFO - [2025-07-20 17:18:41 TP0] Capture cuda graph end. Time elapsed: 2.14 s
  20426. 2025-07-20 17:18:41,327 - __main__ - WARNING - Attempt 50: Please wait for sglang server to become ready...
  20427. 2025-07-20 17:18:41,963 - sglang - INFO - [2025-07-20 17:18:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  20428. 2025-07-20 17:18:41,963 - __main__ - INFO - [2025-07-20 17:18:41 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  20429. 2025-07-20 17:18:42,396 - __main__ - INFO - sglang server is ready.
  20430. 2025-07-20 17:18:42,397 - __main__ - INFO - Queue remaining: 1
  20431. 2025-07-20 17:18:42,397 - __main__ - INFO -
  20432. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20433. ----------------------------------------------------------------------------------
  20434. 2025-07-20 17:18:42,397 - __main__ - INFO -
  20435. Worker ID
  20436. ---------
  20437. 2025-07-20 17:18:42,397 - __main__ - INFO - Worker 0 processing work item 9face5eb793573e747789b627bf1cc4b334b5b93
  20438. 2025-07-20 17:18:42,397 - __main__ - INFO - Created all tasks for 9face5eb793573e747789b627bf1cc4b334b5b93
  20439. 2025-07-20 17:18:42,401 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
  20440. 2025-07-20 17:18:43,071 - sglang - INFO - [2025-07-20 17:18:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20441. 2025-07-20 17:18:43,071 - __main__ - INFO - [2025-07-20 17:18:43 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20442. 2025-07-20 17:18:43,072 - __main__ - INFO - sglang running req: 0 queue req: 0
  20443. 2025-07-20 17:18:44,432 - sglang - INFO - [2025-07-20 17:18:44] The server is fired up and ready to roll!
  20444. 2025-07-20 17:18:44,432 - __main__ - INFO - [2025-07-20 17:18:44] The server is fired up and ready to roll!
  20445. 2025-07-20 17:18:48,931 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
  20446. 2025-07-20 17:18:48,965 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
  20447. 2025-07-20 17:18:48,989 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
  20448. 2025-07-20 17:18:48,994 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
  20449. 2025-07-20 17:18:49,017 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
  20450. 2025-07-20 17:18:49,038 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
  20451. 2025-07-20 17:18:49,062 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  20452. 2025-07-20 17:18:49,083 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
  20453. 2025-07-20 17:18:49,121 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
  20454. 2025-07-20 17:18:49,142 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
  20455. 2025-07-20 17:18:49,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
  20456. 2025-07-20 17:18:52,399 - __main__ - INFO - Queue remaining: 0
  20457. 2025-07-20 17:18:52,400 - __main__ - INFO -
  20458. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20459. ----------------------------------------------------------------------------------
  20460. 2025-07-20 17:18:52,400 - __main__ - INFO -
  20461. Worker ID | started
  20462. ----------+--------
  20463. 0 | 11
  20464. 2025-07-20 17:19:02,402 - __main__ - INFO - Queue remaining: 0
  20465. 2025-07-20 17:19:02,402 - __main__ - INFO -
  20466. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20467. ----------------------------------------------------------------------------------
  20468. 2025-07-20 17:19:02,402 - __main__ - INFO -
  20469. Worker ID | started
  20470. ----------+--------
  20471. 0 | 11
  20472. 2025-07-20 17:19:10,063 - sglang - INFO - [2025-07-20 17:19:10 TP0] Prefill batch. #new-seq: 1, #new-token: 2223, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20473. 2025-07-20 17:19:10,063 - __main__ - INFO - sglang running req: 0 queue req: 0
  20474. 2025-07-20 17:19:11,350 - sglang - INFO - [2025-07-20 17:19:11 TP0] Prefill batch. #new-seq: 6, #new-token: 12840, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 4
  20475. 2025-07-20 17:19:11,350 - __main__ - INFO - sglang running req: 1 queue req: 4
  20476. 2025-07-20 17:19:12,404 - __main__ - INFO - Queue remaining: 0
  20477. 2025-07-20 17:19:12,404 - __main__ - INFO -
  20478. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20479. ----------------------------------------------------------------------------------
  20480. 2025-07-20 17:19:12,404 - __main__ - INFO -
  20481. Worker ID | started
  20482. ----------+--------
  20483. 0 | 11
  20484. 2025-07-20 17:19:16,136 - sglang - INFO - [2025-07-20 17:19:16 TP0] Decode batch. #running-req: 7, #token: 15294, token usage: 0.40, gen throughput (token/s): 6.96, #queue-req: 4
  20485. 2025-07-20 17:19:16,137 - __main__ - INFO - sglang running req: 7 queue req: 4
  20486. 2025-07-20 17:19:17,018 - sglang - INFO - [2025-07-20 17:19:17 TP0] Decode batch. #running-req: 7, #token: 15574, token usage: 0.41, gen throughput (token/s): 317.62, #queue-req: 4
  20487. 2025-07-20 17:19:17,018 - __main__ - INFO - sglang running req: 7 queue req: 4
  20488. 2025-07-20 17:19:17,900 - sglang - INFO - [2025-07-20 17:19:17 TP0] Decode batch. #running-req: 7, #token: 15854, token usage: 0.42, gen throughput (token/s): 317.28, #queue-req: 4
  20489. 2025-07-20 17:19:17,901 - __main__ - INFO - sglang running req: 7 queue req: 4
  20490. 2025-07-20 17:19:18,786 - sglang - INFO - [2025-07-20 17:19:18 TP0] Decode batch. #running-req: 7, #token: 16134, token usage: 0.42, gen throughput (token/s): 316.18, #queue-req: 4
  20491. 2025-07-20 17:19:18,786 - __main__ - INFO - sglang running req: 7 queue req: 4
  20492. 2025-07-20 17:19:19,674 - sglang - INFO - [2025-07-20 17:19:19 TP0] Decode batch. #running-req: 7, #token: 16414, token usage: 0.43, gen throughput (token/s): 315.41, #queue-req: 4
  20493. 2025-07-20 17:19:19,674 - __main__ - INFO - sglang running req: 7 queue req: 4
  20494. 2025-07-20 17:19:20,561 - sglang - INFO - [2025-07-20 17:19:20 TP0] Decode batch. #running-req: 7, #token: 16694, token usage: 0.44, gen throughput (token/s): 315.71, #queue-req: 4
  20495. 2025-07-20 17:19:20,561 - __main__ - INFO - sglang running req: 7 queue req: 4
  20496. 2025-07-20 17:19:21,450 - sglang - INFO - [2025-07-20 17:19:21 TP0] Decode batch. #running-req: 7, #token: 16974, token usage: 0.45, gen throughput (token/s): 314.80, #queue-req: 4
  20497. 2025-07-20 17:19:21,450 - __main__ - INFO - sglang running req: 7 queue req: 4
  20498. 2025-07-20 17:19:22,007 - sglang - INFO - [2025-07-20 17:19:22 TP0] Prefill batch. #new-seq: 3, #new-token: 6254, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 1
  20499. 2025-07-20 17:19:22,007 - __main__ - INFO - sglang running req: 6 queue req: 1
  20500. 2025-07-20 17:19:22,406 - __main__ - INFO - Queue remaining: 0
  20501. 2025-07-20 17:19:22,406 - __main__ - INFO -
  20502. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20503. ----------------------------------------------------------------------------------
  20504. sglang_input_tokens 21.16 21.16
  20505. sglang_output_tokens 3.18 3.18
  20506. 2025-07-20 17:19:22,406 - __main__ - INFO -
  20507. Worker ID | finished | started
  20508. ----------+----------+--------
  20509. 0 | 1 | 11
  20510. 2025-07-20 17:19:24,268 - sglang - INFO - [2025-07-20 17:19:24 TP0] Decode batch. #running-req: 9, #token: 21260, token usage: 0.56, gen throughput (token/s): 109.65, #queue-req: 1
  20511. 2025-07-20 17:19:24,268 - __main__ - INFO - sglang running req: 9 queue req: 1
  20512. 2025-07-20 17:19:25,068 - sglang - INFO - [2025-07-20 17:19:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2051, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.50, #running-req: 8, #queue-req: 0
  20513. 2025-07-20 17:19:25,068 - __main__ - INFO - sglang running req: 8 queue req: 0
  20514. 2025-07-20 17:19:25,566 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  20515. 2025-07-20 17:19:25,873 - sglang - INFO - [2025-07-20 17:19:25 TP0] Decode batch. #running-req: 9, #token: 21065, token usage: 0.55, gen throughput (token/s): 223.64, #queue-req: 0
  20516. 2025-07-20 17:19:25,873 - __main__ - INFO - sglang running req: 9 queue req: 0
  20517. 2025-07-20 17:19:26,803 - sglang - INFO - [2025-07-20 17:19:26 TP0] Decode batch. #running-req: 8, #token: 18964, token usage: 0.50, gen throughput (token/s): 374.26, #queue-req: 0
  20518. 2025-07-20 17:19:26,803 - __main__ - INFO - sglang running req: 8 queue req: 0
  20519. 2025-07-20 17:19:27,694 - sglang - INFO - [2025-07-20 17:19:27 TP0] Decode batch. #running-req: 8, #token: 19284, token usage: 0.51, gen throughput (token/s): 359.18, #queue-req: 0
  20520. 2025-07-20 17:19:27,694 - __main__ - INFO - sglang running req: 8 queue req: 0
  20521. 2025-07-20 17:19:28,588 - sglang - INFO - [2025-07-20 17:19:28 TP0] Decode batch. #running-req: 8, #token: 19604, token usage: 0.52, gen throughput (token/s): 358.10, #queue-req: 0
  20522. 2025-07-20 17:19:28,588 - __main__ - INFO - sglang running req: 8 queue req: 0
  20523. 2025-07-20 17:19:29,477 - sglang - INFO - [2025-07-20 17:19:29 TP0] Decode batch. #running-req: 7, #token: 17500, token usage: 0.46, gen throughput (token/s): 315.90, #queue-req: 0
  20524. 2025-07-20 17:19:29,477 - __main__ - INFO - sglang running req: 7 queue req: 0
  20525. 2025-07-20 17:19:30,366 - sglang - INFO - [2025-07-20 17:19:30 TP0] Decode batch. #running-req: 7, #token: 17780, token usage: 0.47, gen throughput (token/s): 315.02, #queue-req: 0
  20526. 2025-07-20 17:19:30,366 - __main__ - INFO - sglang running req: 7 queue req: 0
  20527. 2025-07-20 17:19:31,252 - sglang - INFO - [2025-07-20 17:19:31 TP0] Decode batch. #running-req: 7, #token: 18060, token usage: 0.48, gen throughput (token/s): 316.13, #queue-req: 0
  20528. 2025-07-20 17:19:31,252 - __main__ - INFO - sglang running req: 7 queue req: 0
  20529. 2025-07-20 17:19:32,136 - sglang - INFO - [2025-07-20 17:19:32 TP0] Decode batch. #running-req: 7, #token: 18340, token usage: 0.48, gen throughput (token/s): 316.81, #queue-req: 0
  20530. 2025-07-20 17:19:32,136 - __main__ - INFO - sglang running req: 7 queue req: 0
  20531. 2025-07-20 17:19:32,407 - __main__ - INFO - Queue remaining: 0
  20532. 2025-07-20 17:19:32,408 - __main__ - INFO -
  20533. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20534. ----------------------------------------------------------------------------------
  20535. sglang_input_tokens 79.35 79.35
  20536. sglang_output_tokens 14.50 14.50
  20537. 2025-07-20 17:19:32,408 - __main__ - INFO -
  20538. Worker ID | finished | started
  20539. ----------+----------+--------
  20540. 0 | 4 | 11
  20541. 2025-07-20 17:19:33,015 - sglang - INFO - [2025-07-20 17:19:33 TP0] Decode batch. #running-req: 5, #token: 13130, token usage: 0.35, gen throughput (token/s): 261.41, #queue-req: 0
  20542. 2025-07-20 17:19:33,016 - __main__ - INFO - sglang running req: 5 queue req: 0
  20543. 2025-07-20 17:19:33,890 - sglang - INFO - [2025-07-20 17:19:33 TP0] Decode batch. #running-req: 4, #token: 10913, token usage: 0.29, gen throughput (token/s): 219.53, #queue-req: 0
  20544. 2025-07-20 17:19:33,890 - __main__ - INFO - sglang running req: 4 queue req: 0
  20545. 2025-07-20 17:19:34,753 - sglang - INFO - [2025-07-20 17:19:34 TP0] Decode batch. #running-req: 4, #token: 11073, token usage: 0.29, gen throughput (token/s): 185.29, #queue-req: 0
  20546. 2025-07-20 17:19:34,753 - __main__ - INFO - sglang running req: 4 queue req: 0
  20547. 2025-07-20 17:19:35,611 - sglang - INFO - [2025-07-20 17:19:35 TP0] Decode batch. #running-req: 3, #token: 8965, token usage: 0.24, gen throughput (token/s): 145.72, #queue-req: 0
  20548. 2025-07-20 17:19:35,611 - __main__ - INFO - sglang running req: 3 queue req: 0
  20549. 2025-07-20 17:19:36,468 - sglang - INFO - [2025-07-20 17:19:36 TP0] Decode batch. #running-req: 3, #token: 9085, token usage: 0.24, gen throughput (token/s): 140.04, #queue-req: 0
  20550. 2025-07-20 17:19:36,468 - __main__ - INFO - sglang running req: 3 queue req: 0
  20551. 2025-07-20 17:19:37,318 - sglang - INFO - [2025-07-20 17:19:37 TP0] Decode batch. #running-req: 2, #token: 6109, token usage: 0.16, gen throughput (token/s): 114.14, #queue-req: 0
  20552. 2025-07-20 17:19:37,318 - __main__ - INFO - sglang running req: 2 queue req: 0
  20553. 2025-07-20 17:19:38,161 - sglang - INFO - [2025-07-20 17:19:38 TP0] Decode batch. #running-req: 2, #token: 6189, token usage: 0.16, gen throughput (token/s): 94.91, #queue-req: 0
  20554. 2025-07-20 17:19:38,161 - __main__ - INFO - sglang running req: 2 queue req: 0
  20555. 2025-07-20 17:19:39,002 - sglang - INFO - [2025-07-20 17:19:39 TP0] Decode batch. #running-req: 2, #token: 6269, token usage: 0.17, gen throughput (token/s): 95.10, #queue-req: 0
  20556. 2025-07-20 17:19:39,003 - __main__ - INFO - sglang running req: 2 queue req: 0
  20557. 2025-07-20 17:19:39,843 - sglang - INFO - [2025-07-20 17:19:39 TP0] Decode batch. #running-req: 2, #token: 6349, token usage: 0.17, gen throughput (token/s): 95.09, #queue-req: 0
  20558. 2025-07-20 17:19:39,844 - __main__ - INFO - sglang running req: 2 queue req: 0
  20559. 2025-07-20 17:19:40,687 - sglang - INFO - [2025-07-20 17:19:40 TP0] Decode batch. #running-req: 2, #token: 6429, token usage: 0.17, gen throughput (token/s): 94.83, #queue-req: 0
  20560. 2025-07-20 17:19:40,687 - __main__ - INFO - sglang running req: 2 queue req: 0
  20561. 2025-07-20 17:19:41,533 - sglang - INFO - [2025-07-20 17:19:41 TP0] Decode batch. #running-req: 2, #token: 6509, token usage: 0.17, gen throughput (token/s): 94.62, #queue-req: 0
  20562. 2025-07-20 17:19:41,533 - __main__ - INFO - sglang running req: 2 queue req: 0
  20563. 2025-07-20 17:19:42,377 - sglang - INFO - [2025-07-20 17:19:42 TP0] Decode batch. #running-req: 2, #token: 6589, token usage: 0.17, gen throughput (token/s): 94.71, #queue-req: 0
  20564. 2025-07-20 17:19:42,378 - __main__ - INFO - sglang running req: 2 queue req: 0
  20565. 2025-07-20 17:19:42,409 - __main__ - INFO - Queue remaining: 0
  20566. 2025-07-20 17:19:42,409 - __main__ - INFO -
  20567. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20568. ----------------------------------------------------------------------------------
  20569. sglang_input_tokens 164.72 164.72
  20570. sglang_output_tokens 36.74 36.74
  20571. 2025-07-20 17:19:42,410 - __main__ - INFO -
  20572. Worker ID | finished | started
  20573. ----------+----------+--------
  20574. 0 | 9 | 11
  20575. 2025-07-20 17:19:43,221 - sglang - INFO - [2025-07-20 17:19:43 TP0] Decode batch. #running-req: 2, #token: 6669, token usage: 0.18, gen throughput (token/s): 94.78, #queue-req: 0
  20576. 2025-07-20 17:19:43,221 - __main__ - INFO - sglang running req: 2 queue req: 0
  20577. 2025-07-20 17:19:44,067 - sglang - INFO - [2025-07-20 17:19:44 TP0] Decode batch. #running-req: 2, #token: 6749, token usage: 0.18, gen throughput (token/s): 94.54, #queue-req: 0
  20578. 2025-07-20 17:19:44,068 - __main__ - INFO - sglang running req: 2 queue req: 0
  20579. 2025-07-20 17:19:44,915 - sglang - INFO - [2025-07-20 17:19:44 TP0] Decode batch. #running-req: 2, #token: 6829, token usage: 0.18, gen throughput (token/s): 94.43, #queue-req: 0
  20580. 2025-07-20 17:19:44,915 - __main__ - INFO - sglang running req: 2 queue req: 0
  20581. 2025-07-20 17:19:45,761 - sglang - INFO - [2025-07-20 17:19:45 TP0] Decode batch. #running-req: 1, #token: 3355, token usage: 0.09, gen throughput (token/s): 85.05, #queue-req: 0
  20582. 2025-07-20 17:19:45,762 - __main__ - INFO - sglang running req: 1 queue req: 0
  20583. 2025-07-20 17:19:46,591 - sglang - INFO - [2025-07-20 17:19:46 TP0] Decode batch. #running-req: 1, #token: 3395, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
  20584. 2025-07-20 17:19:46,592 - __main__ - INFO - sglang running req: 1 queue req: 0
  20585. 2025-07-20 17:19:47,421 - sglang - INFO - [2025-07-20 17:19:47 TP0] Decode batch. #running-req: 1, #token: 3435, token usage: 0.09, gen throughput (token/s): 48.20, #queue-req: 0
  20586. 2025-07-20 17:19:47,421 - __main__ - INFO - sglang running req: 1 queue req: 0
  20587. 2025-07-20 17:19:48,258 - sglang - INFO - [2025-07-20 17:19:48 TP0] Decode batch. #running-req: 1, #token: 3475, token usage: 0.09, gen throughput (token/s): 47.79, #queue-req: 0
  20588. 2025-07-20 17:19:48,258 - __main__ - INFO - sglang running req: 1 queue req: 0
  20589. 2025-07-20 17:19:49,097 - sglang - INFO - [2025-07-20 17:19:49 TP0] Decode batch. #running-req: 1, #token: 3515, token usage: 0.09, gen throughput (token/s): 47.68, #queue-req: 0
  20590. 2025-07-20 17:19:49,098 - __main__ - INFO - sglang running req: 1 queue req: 0
  20591. 2025-07-20 17:19:49,935 - sglang - INFO - [2025-07-20 17:19:49 TP0] Decode batch. #running-req: 1, #token: 3555, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
  20592. 2025-07-20 17:19:49,936 - __main__ - INFO - sglang running req: 1 queue req: 0
  20593. 2025-07-20 17:19:50,774 - sglang - INFO - [2025-07-20 17:19:50 TP0] Decode batch. #running-req: 1, #token: 3595, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
  20594. 2025-07-20 17:19:50,774 - __main__ - INFO - sglang running req: 1 queue req: 0
  20595. 2025-07-20 17:19:51,613 - sglang - INFO - [2025-07-20 17:19:51 TP0] Decode batch. #running-req: 1, #token: 3635, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
  20596. 2025-07-20 17:19:51,614 - __main__ - INFO - sglang running req: 1 queue req: 0
  20597. 2025-07-20 17:19:52,412 - __main__ - INFO - Queue remaining: 0
  20598. 2025-07-20 17:19:52,412 - __main__ - INFO -
  20599. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20600. ----------------------------------------------------------------------------------
  20601. sglang_input_tokens 172.26 172.26
  20602. sglang_output_tokens 41.60 41.60
  20603. 2025-07-20 17:19:52,412 - __main__ - INFO -
  20604. Worker ID | finished | started
  20605. ----------+----------+--------
  20606. 0 | 10 | 11
  20607. 2025-07-20 17:19:52,453 - sglang - INFO - [2025-07-20 17:19:52 TP0] Decode batch. #running-req: 1, #token: 3675, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
  20608. 2025-07-20 17:19:52,454 - __main__ - INFO - sglang running req: 1 queue req: 0
  20609. 2025-07-20 17:19:53,292 - sglang - INFO - [2025-07-20 17:19:53 TP0] Decode batch. #running-req: 1, #token: 3715, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
  20610. 2025-07-20 17:19:53,292 - __main__ - INFO - sglang running req: 1 queue req: 0
  20611. 2025-07-20 17:19:54,126 - sglang - INFO - [2025-07-20 17:19:54 TP0] Decode batch. #running-req: 1, #token: 3755, token usage: 0.10, gen throughput (token/s): 47.96, #queue-req: 0
  20612. 2025-07-20 17:19:54,126 - __main__ - INFO - sglang running req: 1 queue req: 0
  20613. 2025-07-20 17:19:54,962 - sglang - INFO - [2025-07-20 17:19:54 TP0] Decode batch. #running-req: 1, #token: 3795, token usage: 0.10, gen throughput (token/s): 47.83, #queue-req: 0
  20614. 2025-07-20 17:19:54,962 - __main__ - INFO - sglang running req: 1 queue req: 0
  20615. 2025-07-20 17:19:55,802 - sglang - INFO - [2025-07-20 17:19:55 TP0] Decode batch. #running-req: 1, #token: 3835, token usage: 0.10, gen throughput (token/s): 47.59, #queue-req: 0
  20616. 2025-07-20 17:19:55,803 - __main__ - INFO - sglang running req: 1 queue req: 0
  20617. 2025-07-20 17:19:56,644 - sglang - INFO - [2025-07-20 17:19:56 TP0] Decode batch. #running-req: 1, #token: 3875, token usage: 0.10, gen throughput (token/s): 47.56, #queue-req: 0
  20618. 2025-07-20 17:19:56,644 - __main__ - INFO - sglang running req: 1 queue req: 0
  20619. 2025-07-20 17:19:57,484 - sglang - INFO - [2025-07-20 17:19:57 TP0] Decode batch. #running-req: 1, #token: 3915, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
  20620. 2025-07-20 17:19:57,484 - __main__ - INFO - sglang running req: 1 queue req: 0
  20621. 2025-07-20 17:19:58,324 - sglang - INFO - [2025-07-20 17:19:58 TP0] Decode batch. #running-req: 1, #token: 3955, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
  20622. 2025-07-20 17:19:58,324 - __main__ - INFO - sglang running req: 1 queue req: 0
  20623. 2025-07-20 17:19:59,165 - sglang - INFO - [2025-07-20 17:19:59 TP0] Decode batch. #running-req: 1, #token: 3995, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
  20624. 2025-07-20 17:19:59,165 - __main__ - INFO - sglang running req: 1 queue req: 0
  20625. 2025-07-20 17:20:00,006 - sglang - INFO - [2025-07-20 17:20:00 TP0] Decode batch. #running-req: 1, #token: 4035, token usage: 0.11, gen throughput (token/s): 47.54, #queue-req: 0
  20626. 2025-07-20 17:20:00,007 - __main__ - INFO - sglang running req: 1 queue req: 0
  20627. 2025-07-20 17:20:00,844 - sglang - INFO - [2025-07-20 17:20:00 TP0] Decode batch. #running-req: 1, #token: 4075, token usage: 0.11, gen throughput (token/s): 47.74, #queue-req: 0
  20628. 2025-07-20 17:20:00,844 - __main__ - INFO - sglang running req: 1 queue req: 0
  20629. 2025-07-20 17:20:01,680 - sglang - INFO - [2025-07-20 17:20:01 TP0] Decode batch. #running-req: 1, #token: 4115, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
  20630. 2025-07-20 17:20:01,680 - __main__ - INFO - sglang running req: 1 queue req: 0
  20631. 2025-07-20 17:20:02,414 - __main__ - INFO - Queue remaining: 0
  20632. 2025-07-20 17:20:02,414 - __main__ - INFO -
  20633. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20634. ----------------------------------------------------------------------------------
  20635. sglang_input_tokens 159.36 159.36
  20636. sglang_output_tokens 38.49 38.49
  20637. 2025-07-20 17:20:02,414 - __main__ - INFO -
  20638. Worker ID | finished | started
  20639. ----------+----------+--------
  20640. 0 | 10 | 11
  20641. 2025-07-20 17:20:02,520 - sglang - INFO - [2025-07-20 17:20:02 TP0] Decode batch. #running-req: 1, #token: 4155, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  20642. 2025-07-20 17:20:02,520 - __main__ - INFO - sglang running req: 1 queue req: 0
  20643. 2025-07-20 17:20:03,362 - sglang - INFO - [2025-07-20 17:20:03 TP0] Decode batch. #running-req: 1, #token: 4195, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
  20644. 2025-07-20 17:20:03,362 - __main__ - INFO - sglang running req: 1 queue req: 0
  20645. 2025-07-20 17:20:04,203 - sglang - INFO - [2025-07-20 17:20:04 TP0] Decode batch. #running-req: 1, #token: 4235, token usage: 0.11, gen throughput (token/s): 47.51, #queue-req: 0
  20646. 2025-07-20 17:20:04,204 - __main__ - INFO - sglang running req: 1 queue req: 0
  20647. 2025-07-20 17:20:05,045 - sglang - INFO - [2025-07-20 17:20:05 TP0] Decode batch. #running-req: 1, #token: 4275, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
  20648. 2025-07-20 17:20:05,045 - __main__ - INFO - sglang running req: 1 queue req: 0
  20649. 2025-07-20 17:20:05,887 - sglang - INFO - [2025-07-20 17:20:05 TP0] Decode batch. #running-req: 1, #token: 4315, token usage: 0.11, gen throughput (token/s): 47.50, #queue-req: 0
  20650. 2025-07-20 17:20:05,887 - __main__ - INFO - sglang running req: 1 queue req: 0
  20651. 2025-07-20 17:20:06,730 - sglang - INFO - [2025-07-20 17:20:06 TP0] Decode batch. #running-req: 1, #token: 4355, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
  20652. 2025-07-20 17:20:06,730 - __main__ - INFO - sglang running req: 1 queue req: 0
  20653. 2025-07-20 17:20:07,573 - sglang - INFO - [2025-07-20 17:20:07 TP0] Decode batch. #running-req: 1, #token: 4395, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  20654. 2025-07-20 17:20:07,573 - __main__ - INFO - sglang running req: 1 queue req: 0
  20655. 2025-07-20 17:20:08,413 - sglang - INFO - [2025-07-20 17:20:08 TP0] Decode batch. #running-req: 1, #token: 4435, token usage: 0.12, gen throughput (token/s): 47.60, #queue-req: 0
  20656. 2025-07-20 17:20:08,414 - __main__ - INFO - sglang running req: 1 queue req: 0
  20657. 2025-07-20 17:20:09,255 - sglang - INFO - [2025-07-20 17:20:09 TP0] Decode batch. #running-req: 1, #token: 4475, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
  20658. 2025-07-20 17:20:09,255 - __main__ - INFO - sglang running req: 1 queue req: 0
  20659. 2025-07-20 17:20:10,097 - sglang - INFO - [2025-07-20 17:20:10 TP0] Decode batch. #running-req: 1, #token: 4515, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
  20660. 2025-07-20 17:20:10,097 - __main__ - INFO - sglang running req: 1 queue req: 0
  20661. 2025-07-20 17:20:10,940 - sglang - INFO - [2025-07-20 17:20:10 TP0] Decode batch. #running-req: 1, #token: 4555, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
  20662. 2025-07-20 17:20:10,940 - __main__ - INFO - sglang running req: 1 queue req: 0
  20663. 2025-07-20 17:20:11,783 - sglang - INFO - [2025-07-20 17:20:11 TP0] Decode batch. #running-req: 1, #token: 4595, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
  20664. 2025-07-20 17:20:11,783 - __main__ - INFO - sglang running req: 1 queue req: 0
  20665. 2025-07-20 17:20:12,415 - __main__ - INFO - Queue remaining: 0
  20666. 2025-07-20 17:20:12,416 - __main__ - INFO -
  20667. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20668. ----------------------------------------------------------------------------------
  20669. sglang_input_tokens 148.26 148.26
  20670. sglang_output_tokens 35.81 35.81
  20671. 2025-07-20 17:20:12,416 - __main__ - INFO -
  20672. Worker ID | finished | started
  20673. ----------+----------+--------
  20674. 0 | 10 | 11
  20675. 2025-07-20 17:20:12,625 - sglang - INFO - [2025-07-20 17:20:12 TP0] Decode batch. #running-req: 1, #token: 4635, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
  20676. 2025-07-20 17:20:12,626 - __main__ - INFO - sglang running req: 1 queue req: 0
  20677. 2025-07-20 17:20:13,470 - sglang - INFO - [2025-07-20 17:20:13 TP0] Decode batch. #running-req: 1, #token: 4675, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
  20678. 2025-07-20 17:20:13,470 - __main__ - INFO - sglang running req: 1 queue req: 0
  20679. 2025-07-20 17:20:14,314 - sglang - INFO - [2025-07-20 17:20:14 TP0] Decode batch. #running-req: 1, #token: 4715, token usage: 0.12, gen throughput (token/s): 47.39, #queue-req: 0
  20680. 2025-07-20 17:20:14,314 - __main__ - INFO - sglang running req: 1 queue req: 0
  20681. 2025-07-20 17:20:15,156 - sglang - INFO - [2025-07-20 17:20:15 TP0] Decode batch. #running-req: 1, #token: 4755, token usage: 0.13, gen throughput (token/s): 47.49, #queue-req: 0
  20682. 2025-07-20 17:20:15,156 - __main__ - INFO - sglang running req: 1 queue req: 0
  20683. 2025-07-20 17:20:15,998 - sglang - INFO - [2025-07-20 17:20:15 TP0] Decode batch. #running-req: 1, #token: 4795, token usage: 0.13, gen throughput (token/s): 47.49, #queue-req: 0
  20684. 2025-07-20 17:20:15,999 - __main__ - INFO - sglang running req: 1 queue req: 0
  20685. 2025-07-20 17:20:16,839 - sglang - INFO - [2025-07-20 17:20:16 TP0] Decode batch. #running-req: 1, #token: 4835, token usage: 0.13, gen throughput (token/s): 47.60, #queue-req: 0
  20686. 2025-07-20 17:20:16,839 - __main__ - INFO - sglang running req: 1 queue req: 0
  20687. 2025-07-20 17:20:17,680 - sglang - INFO - [2025-07-20 17:20:17 TP0] Decode batch. #running-req: 1, #token: 4875, token usage: 0.13, gen throughput (token/s): 47.54, #queue-req: 0
  20688. 2025-07-20 17:20:17,680 - __main__ - INFO - sglang running req: 1 queue req: 0
  20689. 2025-07-20 17:20:18,524 - sglang - INFO - [2025-07-20 17:20:18 TP0] Decode batch. #running-req: 1, #token: 4915, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
  20690. 2025-07-20 17:20:18,524 - __main__ - INFO - sglang running req: 1 queue req: 0
  20691. 2025-07-20 17:20:19,367 - sglang - INFO - [2025-07-20 17:20:19 TP0] Decode batch. #running-req: 1, #token: 4955, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
  20692. 2025-07-20 17:20:19,367 - __main__ - INFO - sglang running req: 1 queue req: 0
  20693. 2025-07-20 17:20:20,211 - sglang - INFO - [2025-07-20 17:20:20 TP0] Decode batch. #running-req: 1, #token: 4995, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
  20694. 2025-07-20 17:20:20,211 - __main__ - INFO - sglang running req: 1 queue req: 0
  20695. 2025-07-20 17:20:21,057 - sglang - INFO - [2025-07-20 17:20:21 TP0] Decode batch. #running-req: 1, #token: 5035, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
  20696. 2025-07-20 17:20:21,057 - __main__ - INFO - sglang running req: 1 queue req: 0
  20697. 2025-07-20 17:20:21,903 - sglang - INFO - [2025-07-20 17:20:21 TP0] Decode batch. #running-req: 1, #token: 5075, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
  20698. 2025-07-20 17:20:21,903 - __main__ - INFO - sglang running req: 1 queue req: 0
  20699. 2025-07-20 17:20:22,036 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
  20700. 2025-07-20 17:20:22,177 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  20701. 2025-07-20 17:20:22,326 - sglang - INFO - [2025-07-20 17:20:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20702. 2025-07-20 17:20:22,326 - __main__ - INFO - sglang running req: 0 queue req: 0
  20703. 2025-07-20 17:20:22,417 - __main__ - INFO - Queue remaining: 0
  20704. 2025-07-20 17:20:22,417 - __main__ - INFO -
  20705. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20706. ----------------------------------------------------------------------------------
  20707. sglang_input_tokens 152.16 152.16
  20708. sglang_output_tokens 53.01 53.01
  20709. 2025-07-20 17:20:22,417 - __main__ - INFO -
  20710. Worker ID | finished | started
  20711. ----------+----------+--------
  20712. 0 | 10 | 11
  20713. 2025-07-20 17:20:23,706 - sglang - INFO - [2025-07-20 17:20:23 TP0] Decode batch. #running-req: 1, #token: 2116, token usage: 0.06, gen throughput (token/s): 22.18, #queue-req: 0
  20714. 2025-07-20 17:20:23,706 - __main__ - INFO - sglang running req: 1 queue req: 0
  20715. 2025-07-20 17:20:24,540 - sglang - INFO - [2025-07-20 17:20:24 TP0] Decode batch. #running-req: 1, #token: 2156, token usage: 0.06, gen throughput (token/s): 47.95, #queue-req: 0
  20716. 2025-07-20 17:20:24,540 - __main__ - INFO - sglang running req: 1 queue req: 0
  20717. 2025-07-20 17:20:25,376 - sglang - INFO - [2025-07-20 17:20:25 TP0] Decode batch. #running-req: 1, #token: 2196, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
  20718. 2025-07-20 17:20:25,376 - __main__ - INFO - sglang running req: 1 queue req: 0
  20719. 2025-07-20 17:20:26,212 - sglang - INFO - [2025-07-20 17:20:26 TP0] Decode batch. #running-req: 1, #token: 2236, token usage: 0.06, gen throughput (token/s): 47.84, #queue-req: 0
  20720. 2025-07-20 17:20:26,213 - __main__ - INFO - sglang running req: 1 queue req: 0
  20721. 2025-07-20 17:20:27,048 - sglang - INFO - [2025-07-20 17:20:27 TP0] Decode batch. #running-req: 1, #token: 2276, token usage: 0.06, gen throughput (token/s): 47.84, #queue-req: 0
  20722. 2025-07-20 17:20:27,049 - __main__ - INFO - sglang running req: 1 queue req: 0
  20723. 2025-07-20 17:20:27,886 - sglang - INFO - [2025-07-20 17:20:27 TP0] Decode batch. #running-req: 1, #token: 2316, token usage: 0.06, gen throughput (token/s): 47.75, #queue-req: 0
  20724. 2025-07-20 17:20:27,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  20725. 2025-07-20 17:20:28,724 - sglang - INFO - [2025-07-20 17:20:28 TP0] Decode batch. #running-req: 1, #token: 2356, token usage: 0.06, gen throughput (token/s): 47.75, #queue-req: 0
  20726. 2025-07-20 17:20:28,724 - __main__ - INFO - sglang running req: 1 queue req: 0
  20727. 2025-07-20 17:20:29,560 - sglang - INFO - [2025-07-20 17:20:29 TP0] Decode batch. #running-req: 1, #token: 2396, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
  20728. 2025-07-20 17:20:29,560 - __main__ - INFO - sglang running req: 1 queue req: 0
  20729. 2025-07-20 17:20:30,396 - sglang - INFO - [2025-07-20 17:20:30 TP0] Decode batch. #running-req: 1, #token: 2436, token usage: 0.06, gen throughput (token/s): 47.86, #queue-req: 0
  20730. 2025-07-20 17:20:30,396 - __main__ - INFO - sglang running req: 1 queue req: 0
  20731. 2025-07-20 17:20:31,226 - sglang - INFO - [2025-07-20 17:20:31 TP0] Decode batch. #running-req: 1, #token: 2476, token usage: 0.07, gen throughput (token/s): 48.19, #queue-req: 0
  20732. 2025-07-20 17:20:31,226 - __main__ - INFO - sglang running req: 1 queue req: 0
  20733. 2025-07-20 17:20:32,060 - sglang - INFO - [2025-07-20 17:20:32 TP0] Decode batch. #running-req: 1, #token: 2516, token usage: 0.07, gen throughput (token/s): 47.96, #queue-req: 0
  20734. 2025-07-20 17:20:32,060 - __main__ - INFO - sglang running req: 1 queue req: 0
  20735. 2025-07-20 17:20:32,418 - __main__ - INFO - Queue remaining: 0
  20736. 2025-07-20 17:20:32,418 - __main__ - INFO -
  20737. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20738. ----------------------------------------------------------------------------------
  20739. sglang_input_tokens 142.86 142.86
  20740. sglang_output_tokens 49.77 49.77
  20741. 2025-07-20 17:20:32,418 - __main__ - INFO -
  20742. Worker ID | finished | started
  20743. ----------+----------+--------
  20744. 0 | 10 | 11
  20745. 2025-07-20 17:20:32,896 - sglang - INFO - [2025-07-20 17:20:32 TP0] Decode batch. #running-req: 1, #token: 2556, token usage: 0.07, gen throughput (token/s): 47.83, #queue-req: 0
  20746. 2025-07-20 17:20:32,896 - __main__ - INFO - sglang running req: 1 queue req: 0
  20747. 2025-07-20 17:20:33,735 - sglang - INFO - [2025-07-20 17:20:33 TP0] Decode batch. #running-req: 1, #token: 2596, token usage: 0.07, gen throughput (token/s): 47.70, #queue-req: 0
  20748. 2025-07-20 17:20:33,735 - __main__ - INFO - sglang running req: 1 queue req: 0
  20749. 2025-07-20 17:20:34,573 - sglang - INFO - [2025-07-20 17:20:34 TP0] Decode batch. #running-req: 1, #token: 2636, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  20750. 2025-07-20 17:20:34,573 - __main__ - INFO - sglang running req: 1 queue req: 0
  20751. 2025-07-20 17:20:35,411 - sglang - INFO - [2025-07-20 17:20:35 TP0] Decode batch. #running-req: 1, #token: 2676, token usage: 0.07, gen throughput (token/s): 47.74, #queue-req: 0
  20752. 2025-07-20 17:20:35,411 - __main__ - INFO - sglang running req: 1 queue req: 0
  20753. 2025-07-20 17:20:36,249 - sglang - INFO - [2025-07-20 17:20:36 TP0] Decode batch. #running-req: 1, #token: 2716, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
  20754. 2025-07-20 17:20:36,249 - __main__ - INFO - sglang running req: 1 queue req: 0
  20755. 2025-07-20 17:20:37,089 - sglang - INFO - [2025-07-20 17:20:37 TP0] Decode batch. #running-req: 1, #token: 2756, token usage: 0.07, gen throughput (token/s): 47.66, #queue-req: 0
  20756. 2025-07-20 17:20:37,089 - __main__ - INFO - sglang running req: 1 queue req: 0
  20757. 2025-07-20 17:20:37,926 - sglang - INFO - [2025-07-20 17:20:37 TP0] Decode batch. #running-req: 1, #token: 2796, token usage: 0.07, gen throughput (token/s): 47.78, #queue-req: 0
  20758. 2025-07-20 17:20:37,926 - __main__ - INFO - sglang running req: 1 queue req: 0
  20759. 2025-07-20 17:20:38,762 - sglang - INFO - [2025-07-20 17:20:38 TP0] Decode batch. #running-req: 1, #token: 2836, token usage: 0.07, gen throughput (token/s): 47.81, #queue-req: 0
  20760. 2025-07-20 17:20:38,763 - __main__ - INFO - sglang running req: 1 queue req: 0
  20761. 2025-07-20 17:20:39,600 - sglang - INFO - [2025-07-20 17:20:39 TP0] Decode batch. #running-req: 1, #token: 2876, token usage: 0.08, gen throughput (token/s): 47.76, #queue-req: 0
  20762. 2025-07-20 17:20:39,600 - __main__ - INFO - sglang running req: 1 queue req: 0
  20763. 2025-07-20 17:20:40,439 - sglang - INFO - [2025-07-20 17:20:40 TP0] Decode batch. #running-req: 1, #token: 2916, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
  20764. 2025-07-20 17:20:40,439 - __main__ - INFO - sglang running req: 1 queue req: 0
  20765. 2025-07-20 17:20:41,279 - sglang - INFO - [2025-07-20 17:20:41 TP0] Decode batch. #running-req: 1, #token: 2956, token usage: 0.08, gen throughput (token/s): 47.60, #queue-req: 0
  20766. 2025-07-20 17:20:41,279 - __main__ - INFO - sglang running req: 1 queue req: 0
  20767. 2025-07-20 17:20:42,117 - sglang - INFO - [2025-07-20 17:20:42 TP0] Decode batch. #running-req: 1, #token: 2996, token usage: 0.08, gen throughput (token/s): 47.73, #queue-req: 0
  20768. 2025-07-20 17:20:42,117 - __main__ - INFO - sglang running req: 1 queue req: 0
  20769. 2025-07-20 17:20:42,419 - __main__ - INFO - Queue remaining: 0
  20770. 2025-07-20 17:20:42,419 - __main__ - INFO -
  20771. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20772. ----------------------------------------------------------------------------------
  20773. sglang_input_tokens 134.63 134.63
  20774. sglang_output_tokens 46.90 46.90
  20775. 2025-07-20 17:20:42,420 - __main__ - INFO -
  20776. Worker ID | finished | started
  20777. ----------+----------+--------
  20778. 0 | 10 | 11
  20779. 2025-07-20 17:20:42,956 - sglang - INFO - [2025-07-20 17:20:42 TP0] Decode batch. #running-req: 1, #token: 3036, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
  20780. 2025-07-20 17:20:42,956 - __main__ - INFO - sglang running req: 1 queue req: 0
  20781. 2025-07-20 17:20:43,796 - sglang - INFO - [2025-07-20 17:20:43 TP0] Decode batch. #running-req: 1, #token: 3076, token usage: 0.08, gen throughput (token/s): 47.64, #queue-req: 0
  20782. 2025-07-20 17:20:43,796 - __main__ - INFO - sglang running req: 1 queue req: 0
  20783. 2025-07-20 17:20:44,636 - sglang - INFO - [2025-07-20 17:20:44 TP0] Decode batch. #running-req: 1, #token: 3116, token usage: 0.08, gen throughput (token/s): 47.60, #queue-req: 0
  20784. 2025-07-20 17:20:44,636 - __main__ - INFO - sglang running req: 1 queue req: 0
  20785. 2025-07-20 17:20:45,475 - sglang - INFO - [2025-07-20 17:20:45 TP0] Decode batch. #running-req: 1, #token: 3156, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
  20786. 2025-07-20 17:20:45,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  20787. 2025-07-20 17:20:46,306 - sglang - INFO - [2025-07-20 17:20:46 TP0] Decode batch. #running-req: 1, #token: 3196, token usage: 0.08, gen throughput (token/s): 48.12, #queue-req: 0
  20788. 2025-07-20 17:20:46,307 - __main__ - INFO - sglang running req: 1 queue req: 0
  20789. 2025-07-20 17:20:47,138 - sglang - INFO - [2025-07-20 17:20:47 TP0] Decode batch. #running-req: 1, #token: 3236, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
  20790. 2025-07-20 17:20:47,138 - __main__ - INFO - sglang running req: 1 queue req: 0
  20791. 2025-07-20 17:20:47,977 - sglang - INFO - [2025-07-20 17:20:47 TP0] Decode batch. #running-req: 1, #token: 3276, token usage: 0.09, gen throughput (token/s): 47.68, #queue-req: 0
  20792. 2025-07-20 17:20:47,977 - __main__ - INFO - sglang running req: 1 queue req: 0
  20793. 2025-07-20 17:20:48,816 - sglang - INFO - [2025-07-20 17:20:48 TP0] Decode batch. #running-req: 1, #token: 3316, token usage: 0.09, gen throughput (token/s): 47.64, #queue-req: 0
  20794. 2025-07-20 17:20:48,817 - __main__ - INFO - sglang running req: 1 queue req: 0
  20795. 2025-07-20 17:20:49,655 - sglang - INFO - [2025-07-20 17:20:49 TP0] Decode batch. #running-req: 1, #token: 3356, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
  20796. 2025-07-20 17:20:49,655 - __main__ - INFO - sglang running req: 1 queue req: 0
  20797. 2025-07-20 17:20:50,495 - sglang - INFO - [2025-07-20 17:20:50 TP0] Decode batch. #running-req: 1, #token: 3396, token usage: 0.09, gen throughput (token/s): 47.60, #queue-req: 0
  20798. 2025-07-20 17:20:50,495 - __main__ - INFO - sglang running req: 1 queue req: 0
  20799. 2025-07-20 17:20:51,336 - sglang - INFO - [2025-07-20 17:20:51 TP0] Decode batch. #running-req: 1, #token: 3436, token usage: 0.09, gen throughput (token/s): 47.53, #queue-req: 0
  20800. 2025-07-20 17:20:51,337 - __main__ - INFO - sglang running req: 1 queue req: 0
  20801. 2025-07-20 17:20:52,177 - sglang - INFO - [2025-07-20 17:20:52 TP0] Decode batch. #running-req: 1, #token: 3476, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
  20802. 2025-07-20 17:20:52,177 - __main__ - INFO - sglang running req: 1 queue req: 0
  20803. 2025-07-20 17:20:52,420 - __main__ - INFO - Queue remaining: 0
  20804. 2025-07-20 17:20:52,420 - __main__ - INFO -
  20805. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20806. ----------------------------------------------------------------------------------
  20807. sglang_input_tokens 127.29 127.29
  20808. sglang_output_tokens 44.35 44.35
  20809. 2025-07-20 17:20:52,420 - __main__ - INFO -
  20810. Worker ID | finished | started
  20811. ----------+----------+--------
  20812. 0 | 10 | 11
  20813. 2025-07-20 17:20:53,014 - sglang - INFO - [2025-07-20 17:20:53 TP0] Decode batch. #running-req: 1, #token: 3516, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
  20814. 2025-07-20 17:20:53,014 - __main__ - INFO - sglang running req: 1 queue req: 0
  20815. 2025-07-20 17:20:53,850 - sglang - INFO - [2025-07-20 17:20:53 TP0] Decode batch. #running-req: 1, #token: 3556, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
  20816. 2025-07-20 17:20:53,850 - __main__ - INFO - sglang running req: 1 queue req: 0
  20817. 2025-07-20 17:20:54,688 - sglang - INFO - [2025-07-20 17:20:54 TP0] Decode batch. #running-req: 1, #token: 3596, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
  20818. 2025-07-20 17:20:54,689 - __main__ - INFO - sglang running req: 1 queue req: 0
  20819. 2025-07-20 17:20:55,530 - sglang - INFO - [2025-07-20 17:20:55 TP0] Decode batch. #running-req: 1, #token: 3636, token usage: 0.10, gen throughput (token/s): 47.51, #queue-req: 0
  20820. 2025-07-20 17:20:55,530 - __main__ - INFO - sglang running req: 1 queue req: 0
  20821. 2025-07-20 17:20:56,371 - sglang - INFO - [2025-07-20 17:20:56 TP0] Decode batch. #running-req: 1, #token: 3676, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
  20822. 2025-07-20 17:20:56,371 - __main__ - INFO - sglang running req: 1 queue req: 0
  20823. 2025-07-20 17:20:57,212 - sglang - INFO - [2025-07-20 17:20:57 TP0] Decode batch. #running-req: 1, #token: 3716, token usage: 0.10, gen throughput (token/s): 47.58, #queue-req: 0
  20824. 2025-07-20 17:20:57,212 - __main__ - INFO - sglang running req: 1 queue req: 0
  20825. 2025-07-20 17:20:58,055 - sglang - INFO - [2025-07-20 17:20:58 TP0] Decode batch. #running-req: 1, #token: 3756, token usage: 0.10, gen throughput (token/s): 47.46, #queue-req: 0
  20826. 2025-07-20 17:20:58,055 - __main__ - INFO - sglang running req: 1 queue req: 0
  20827. 2025-07-20 17:20:58,897 - sglang - INFO - [2025-07-20 17:20:58 TP0] Decode batch. #running-req: 1, #token: 3796, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
  20828. 2025-07-20 17:20:58,897 - __main__ - INFO - sglang running req: 1 queue req: 0
  20829. 2025-07-20 17:20:59,737 - sglang - INFO - [2025-07-20 17:20:59 TP0] Decode batch. #running-req: 1, #token: 3836, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
  20830. 2025-07-20 17:20:59,738 - __main__ - INFO - sglang running req: 1 queue req: 0
  20831. 2025-07-20 17:21:00,576 - sglang - INFO - [2025-07-20 17:21:00 TP0] Decode batch. #running-req: 1, #token: 3876, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
  20832. 2025-07-20 17:21:00,576 - __main__ - INFO - sglang running req: 1 queue req: 0
  20833. 2025-07-20 17:21:01,417 - sglang - INFO - [2025-07-20 17:21:01 TP0] Decode batch. #running-req: 1, #token: 3916, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
  20834. 2025-07-20 17:21:01,417 - __main__ - INFO - sglang running req: 1 queue req: 0
  20835. 2025-07-20 17:21:02,258 - sglang - INFO - [2025-07-20 17:21:02 TP0] Decode batch. #running-req: 1, #token: 3956, token usage: 0.10, gen throughput (token/s): 47.55, #queue-req: 0
  20836. 2025-07-20 17:21:02,258 - __main__ - INFO - sglang running req: 1 queue req: 0
  20837. 2025-07-20 17:21:02,422 - __main__ - INFO - Queue remaining: 0
  20838. 2025-07-20 17:21:02,422 - __main__ - INFO -
  20839. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20840. ----------------------------------------------------------------------------------
  20841. sglang_input_tokens 120.72 120.72
  20842. sglang_output_tokens 42.06 42.06
  20843. 2025-07-20 17:21:02,422 - __main__ - INFO -
  20844. Worker ID | finished | started
  20845. ----------+----------+--------
  20846. 0 | 10 | 11
  20847. 2025-07-20 17:21:03,101 - sglang - INFO - [2025-07-20 17:21:03 TP0] Decode batch. #running-req: 1, #token: 3996, token usage: 0.11, gen throughput (token/s): 47.43, #queue-req: 0
  20848. 2025-07-20 17:21:03,102 - __main__ - INFO - sglang running req: 1 queue req: 0
  20849. 2025-07-20 17:21:03,942 - sglang - INFO - [2025-07-20 17:21:03 TP0] Decode batch. #running-req: 1, #token: 4036, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
  20850. 2025-07-20 17:21:03,942 - __main__ - INFO - sglang running req: 1 queue req: 0
  20851. 2025-07-20 17:21:04,784 - sglang - INFO - [2025-07-20 17:21:04 TP0] Decode batch. #running-req: 1, #token: 4076, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  20852. 2025-07-20 17:21:04,784 - __main__ - INFO - sglang running req: 1 queue req: 0
  20853. 2025-07-20 17:21:05,628 - sglang - INFO - [2025-07-20 17:21:05 TP0] Decode batch. #running-req: 1, #token: 4116, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
  20854. 2025-07-20 17:21:05,628 - __main__ - INFO - sglang running req: 1 queue req: 0
  20855. 2025-07-20 17:21:06,472 - sglang - INFO - [2025-07-20 17:21:06 TP0] Decode batch. #running-req: 1, #token: 4156, token usage: 0.11, gen throughput (token/s): 47.42, #queue-req: 0
  20856. 2025-07-20 17:21:06,472 - __main__ - INFO - sglang running req: 1 queue req: 0
  20857. 2025-07-20 17:21:07,313 - sglang - INFO - [2025-07-20 17:21:07 TP0] Decode batch. #running-req: 1, #token: 4196, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
  20858. 2025-07-20 17:21:07,313 - __main__ - INFO - sglang running req: 1 queue req: 0
  20859. 2025-07-20 17:21:08,151 - sglang - INFO - [2025-07-20 17:21:08 TP0] Decode batch. #running-req: 1, #token: 4236, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
  20860. 2025-07-20 17:21:08,152 - __main__ - INFO - sglang running req: 1 queue req: 0
  20861. 2025-07-20 17:21:08,994 - sglang - INFO - [2025-07-20 17:21:08 TP0] Decode batch. #running-req: 1, #token: 4276, token usage: 0.11, gen throughput (token/s): 47.47, #queue-req: 0
  20862. 2025-07-20 17:21:08,994 - __main__ - INFO - sglang running req: 1 queue req: 0
  20863. 2025-07-20 17:21:09,837 - sglang - INFO - [2025-07-20 17:21:09 TP0] Decode batch. #running-req: 1, #token: 4316, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  20864. 2025-07-20 17:21:09,837 - __main__ - INFO - sglang running req: 1 queue req: 0
  20865. 2025-07-20 17:21:10,680 - sglang - INFO - [2025-07-20 17:21:10 TP0] Decode batch. #running-req: 1, #token: 4356, token usage: 0.11, gen throughput (token/s): 47.42, #queue-req: 0
  20866. 2025-07-20 17:21:10,681 - __main__ - INFO - sglang running req: 1 queue req: 0
  20867. 2025-07-20 17:21:11,523 - sglang - INFO - [2025-07-20 17:21:11 TP0] Decode batch. #running-req: 1, #token: 4396, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
  20868. 2025-07-20 17:21:11,523 - __main__ - INFO - sglang running req: 1 queue req: 0
  20869. 2025-07-20 17:21:12,367 - sglang - INFO - [2025-07-20 17:21:12 TP0] Decode batch. #running-req: 1, #token: 4436, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
  20870. 2025-07-20 17:21:12,367 - __main__ - INFO - sglang running req: 1 queue req: 0
  20871. 2025-07-20 17:21:12,423 - __main__ - INFO - Queue remaining: 0
  20872. 2025-07-20 17:21:12,423 - __main__ - INFO -
  20873. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20874. ----------------------------------------------------------------------------------
  20875. sglang_input_tokens 114.78 114.78
  20876. sglang_output_tokens 39.99 39.99
  20877. 2025-07-20 17:21:12,423 - __main__ - INFO -
  20878. Worker ID | finished | started
  20879. ----------+----------+--------
  20880. 0 | 10 | 11
  20881. 2025-07-20 17:21:13,213 - sglang - INFO - [2025-07-20 17:21:13 TP0] Decode batch. #running-req: 1, #token: 4476, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
  20882. 2025-07-20 17:21:13,213 - __main__ - INFO - sglang running req: 1 queue req: 0
  20883. 2025-07-20 17:21:14,058 - sglang - INFO - [2025-07-20 17:21:14 TP0] Decode batch. #running-req: 1, #token: 4516, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
  20884. 2025-07-20 17:21:14,058 - __main__ - INFO - sglang running req: 1 queue req: 0
  20885. 2025-07-20 17:21:14,901 - sglang - INFO - [2025-07-20 17:21:14 TP0] Decode batch. #running-req: 1, #token: 4556, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
  20886. 2025-07-20 17:21:14,901 - __main__ - INFO - sglang running req: 1 queue req: 0
  20887. 2025-07-20 17:21:15,746 - sglang - INFO - [2025-07-20 17:21:15 TP0] Decode batch. #running-req: 1, #token: 4596, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
  20888. 2025-07-20 17:21:15,746 - __main__ - INFO - sglang running req: 1 queue req: 0
  20889. 2025-07-20 17:21:16,590 - sglang - INFO - [2025-07-20 17:21:16 TP0] Decode batch. #running-req: 1, #token: 4636, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
  20890. 2025-07-20 17:21:16,591 - __main__ - INFO - sglang running req: 1 queue req: 0
  20891. 2025-07-20 17:21:17,435 - sglang - INFO - [2025-07-20 17:21:17 TP0] Decode batch. #running-req: 1, #token: 4676, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
  20892. 2025-07-20 17:21:17,435 - __main__ - INFO - sglang running req: 1 queue req: 0
  20893. 2025-07-20 17:21:18,282 - sglang - INFO - [2025-07-20 17:21:18 TP0] Decode batch. #running-req: 1, #token: 4716, token usage: 0.12, gen throughput (token/s): 47.22, #queue-req: 0
  20894. 2025-07-20 17:21:18,282 - __main__ - INFO - sglang running req: 1 queue req: 0
  20895. 2025-07-20 17:21:19,127 - sglang - INFO - [2025-07-20 17:21:19 TP0] Decode batch. #running-req: 1, #token: 4756, token usage: 0.13, gen throughput (token/s): 47.34, #queue-req: 0
  20896. 2025-07-20 17:21:19,127 - __main__ - INFO - sglang running req: 1 queue req: 0
  20897. 2025-07-20 17:21:19,972 - sglang - INFO - [2025-07-20 17:21:19 TP0] Decode batch. #running-req: 1, #token: 4796, token usage: 0.13, gen throughput (token/s): 47.34, #queue-req: 0
  20898. 2025-07-20 17:21:19,972 - __main__ - INFO - sglang running req: 1 queue req: 0
  20899. 2025-07-20 17:21:20,818 - sglang - INFO - [2025-07-20 17:21:20 TP0] Decode batch. #running-req: 1, #token: 4836, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
  20900. 2025-07-20 17:21:20,818 - __main__ - INFO - sglang running req: 1 queue req: 0
  20901. 2025-07-20 17:21:21,663 - sglang - INFO - [2025-07-20 17:21:21 TP0] Decode batch. #running-req: 1, #token: 4876, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
  20902. 2025-07-20 17:21:21,663 - __main__ - INFO - sglang running req: 1 queue req: 0
  20903. 2025-07-20 17:21:22,425 - __main__ - INFO - Queue remaining: 0
  20904. 2025-07-20 17:21:22,426 - __main__ - INFO -
  20905. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20906. ----------------------------------------------------------------------------------
  20907. sglang_input_tokens 109.41 109.41
  20908. sglang_output_tokens 38.12 38.12
  20909. 2025-07-20 17:21:22,426 - __main__ - INFO -
  20910. Worker ID | finished | started
  20911. ----------+----------+--------
  20912. 0 | 10 | 11
  20913. 2025-07-20 17:21:22,506 - sglang - INFO - [2025-07-20 17:21:22 TP0] Decode batch. #running-req: 1, #token: 4916, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
  20914. 2025-07-20 17:21:22,507 - __main__ - INFO - sglang running req: 1 queue req: 0
  20915. 2025-07-20 17:21:23,348 - sglang - INFO - [2025-07-20 17:21:23 TP0] Decode batch. #running-req: 1, #token: 4956, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
  20916. 2025-07-20 17:21:23,348 - __main__ - INFO - sglang running req: 1 queue req: 0
  20917. 2025-07-20 17:21:24,189 - sglang - INFO - [2025-07-20 17:21:24 TP0] Decode batch. #running-req: 1, #token: 4996, token usage: 0.13, gen throughput (token/s): 47.55, #queue-req: 0
  20918. 2025-07-20 17:21:24,189 - __main__ - INFO - sglang running req: 1 queue req: 0
  20919. 2025-07-20 17:21:25,035 - sglang - INFO - [2025-07-20 17:21:25 TP0] Decode batch. #running-req: 1, #token: 5036, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
  20920. 2025-07-20 17:21:25,035 - __main__ - INFO - sglang running req: 1 queue req: 0
  20921. 2025-07-20 17:21:25,881 - sglang - INFO - [2025-07-20 17:21:25 TP0] Decode batch. #running-req: 1, #token: 5076, token usage: 0.13, gen throughput (token/s): 47.24, #queue-req: 0
  20922. 2025-07-20 17:21:25,882 - __main__ - INFO - sglang running req: 1 queue req: 0
  20923. 2025-07-20 17:21:25,995 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
  20924. 2025-07-20 17:21:26,127 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  20925. 2025-07-20 17:21:26,370 - sglang - INFO - [2025-07-20 17:21:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  20926. 2025-07-20 17:21:26,370 - __main__ - INFO - sglang running req: 0 queue req: 0
  20927. 2025-07-20 17:21:27,778 - sglang - INFO - [2025-07-20 17:21:27 TP0] Decode batch. #running-req: 1, #token: 2117, token usage: 0.06, gen throughput (token/s): 21.09, #queue-req: 0
  20928. 2025-07-20 17:21:27,778 - __main__ - INFO - sglang running req: 1 queue req: 0
  20929. 2025-07-20 17:21:28,617 - sglang - INFO - [2025-07-20 17:21:28 TP0] Decode batch. #running-req: 1, #token: 2157, token usage: 0.06, gen throughput (token/s): 47.69, #queue-req: 0
  20930. 2025-07-20 17:21:28,617 - __main__ - INFO - sglang running req: 1 queue req: 0
  20931. 2025-07-20 17:21:29,455 - sglang - INFO - [2025-07-20 17:21:29 TP0] Decode batch. #running-req: 1, #token: 2197, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
  20932. 2025-07-20 17:21:29,455 - __main__ - INFO - sglang running req: 1 queue req: 0
  20933. 2025-07-20 17:21:30,288 - sglang - INFO - [2025-07-20 17:21:30 TP0] Decode batch. #running-req: 1, #token: 2237, token usage: 0.06, gen throughput (token/s): 47.98, #queue-req: 0
  20934. 2025-07-20 17:21:30,289 - __main__ - INFO - sglang running req: 1 queue req: 0
  20935. 2025-07-20 17:21:31,123 - sglang - INFO - [2025-07-20 17:21:31 TP0] Decode batch. #running-req: 1, #token: 2277, token usage: 0.06, gen throughput (token/s): 47.94, #queue-req: 0
  20936. 2025-07-20 17:21:31,123 - __main__ - INFO - sglang running req: 1 queue req: 0
  20937. 2025-07-20 17:21:31,958 - sglang - INFO - [2025-07-20 17:21:31 TP0] Decode batch. #running-req: 1, #token: 2317, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
  20938. 2025-07-20 17:21:31,958 - __main__ - INFO - sglang running req: 1 queue req: 0
  20939. 2025-07-20 17:21:32,427 - __main__ - INFO - Queue remaining: 0
  20940. 2025-07-20 17:21:32,427 - __main__ - INFO -
  20941. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20942. ----------------------------------------------------------------------------------
  20943. sglang_input_tokens 113.83 113.83
  20944. sglang_output_tokens 49.83 49.83
  20945. 2025-07-20 17:21:32,428 - __main__ - INFO -
  20946. Worker ID | finished | started
  20947. ----------+----------+--------
  20948. 0 | 10 | 11
  20949. 2025-07-20 17:21:32,796 - sglang - INFO - [2025-07-20 17:21:32 TP0] Decode batch. #running-req: 1, #token: 2357, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
  20950. 2025-07-20 17:21:32,797 - __main__ - INFO - sglang running req: 1 queue req: 0
  20951. 2025-07-20 17:21:33,634 - sglang - INFO - [2025-07-20 17:21:33 TP0] Decode batch. #running-req: 1, #token: 2397, token usage: 0.06, gen throughput (token/s): 47.73, #queue-req: 0
  20952. 2025-07-20 17:21:33,635 - __main__ - INFO - sglang running req: 1 queue req: 0
  20953. 2025-07-20 17:21:34,471 - sglang - INFO - [2025-07-20 17:21:34 TP0] Decode batch. #running-req: 1, #token: 2437, token usage: 0.06, gen throughput (token/s): 47.83, #queue-req: 0
  20954. 2025-07-20 17:21:34,471 - __main__ - INFO - sglang running req: 1 queue req: 0
  20955. 2025-07-20 17:21:35,310 - sglang - INFO - [2025-07-20 17:21:35 TP0] Decode batch. #running-req: 1, #token: 2477, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
  20956. 2025-07-20 17:21:35,310 - __main__ - INFO - sglang running req: 1 queue req: 0
  20957. 2025-07-20 17:21:36,148 - sglang - INFO - [2025-07-20 17:21:36 TP0] Decode batch. #running-req: 1, #token: 2517, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  20958. 2025-07-20 17:21:36,148 - __main__ - INFO - sglang running req: 1 queue req: 0
  20959. 2025-07-20 17:21:36,984 - sglang - INFO - [2025-07-20 17:21:36 TP0] Decode batch. #running-req: 1, #token: 2557, token usage: 0.07, gen throughput (token/s): 47.83, #queue-req: 0
  20960. 2025-07-20 17:21:36,985 - __main__ - INFO - sglang running req: 1 queue req: 0
  20961. 2025-07-20 17:21:37,822 - sglang - INFO - [2025-07-20 17:21:37 TP0] Decode batch. #running-req: 1, #token: 2597, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
  20962. 2025-07-20 17:21:37,822 - __main__ - INFO - sglang running req: 1 queue req: 0
  20963. 2025-07-20 17:21:38,660 - sglang - INFO - [2025-07-20 17:21:38 TP0] Decode batch. #running-req: 1, #token: 2637, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
  20964. 2025-07-20 17:21:38,660 - __main__ - INFO - sglang running req: 1 queue req: 0
  20965. 2025-07-20 17:21:39,498 - sglang - INFO - [2025-07-20 17:21:39 TP0] Decode batch. #running-req: 1, #token: 2677, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  20966. 2025-07-20 17:21:39,498 - __main__ - INFO - sglang running req: 1 queue req: 0
  20967. 2025-07-20 17:21:40,337 - sglang - INFO - [2025-07-20 17:21:40 TP0] Decode batch. #running-req: 1, #token: 2717, token usage: 0.07, gen throughput (token/s): 47.67, #queue-req: 0
  20968. 2025-07-20 17:21:40,338 - __main__ - INFO - sglang running req: 1 queue req: 0
  20969. 2025-07-20 17:21:41,178 - sglang - INFO - [2025-07-20 17:21:41 TP0] Decode batch. #running-req: 1, #token: 2757, token usage: 0.07, gen throughput (token/s): 47.62, #queue-req: 0
  20970. 2025-07-20 17:21:41,178 - __main__ - INFO - sglang running req: 1 queue req: 0
  20971. 2025-07-20 17:21:42,017 - sglang - INFO - [2025-07-20 17:21:42 TP0] Decode batch. #running-req: 1, #token: 2797, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
  20972. 2025-07-20 17:21:42,017 - __main__ - INFO - sglang running req: 1 queue req: 0
  20973. 2025-07-20 17:21:42,430 - __main__ - INFO - Queue remaining: 0
  20974. 2025-07-20 17:21:42,430 - __main__ - INFO -
  20975. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  20976. ----------------------------------------------------------------------------------
  20977. sglang_input_tokens 108.95 108.95
  20978. sglang_output_tokens 47.70 47.70
  20979. 2025-07-20 17:21:42,430 - __main__ - INFO -
  20980. Worker ID | finished | started
  20981. ----------+----------+--------
  20982. 0 | 10 | 11
  20983. 2025-07-20 17:21:42,857 - sglang - INFO - [2025-07-20 17:21:42 TP0] Decode batch. #running-req: 1, #token: 2837, token usage: 0.07, gen throughput (token/s): 47.63, #queue-req: 0
  20984. 2025-07-20 17:21:42,857 - __main__ - INFO - sglang running req: 1 queue req: 0
  20985. 2025-07-20 17:21:43,695 - sglang - INFO - [2025-07-20 17:21:43 TP0] Decode batch. #running-req: 1, #token: 2877, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
  20986. 2025-07-20 17:21:43,695 - __main__ - INFO - sglang running req: 1 queue req: 0
  20987. 2025-07-20 17:21:44,534 - sglang - INFO - [2025-07-20 17:21:44 TP0] Decode batch. #running-req: 1, #token: 2917, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
  20988. 2025-07-20 17:21:44,534 - __main__ - INFO - sglang running req: 1 queue req: 0
  20989. 2025-07-20 17:21:45,371 - sglang - INFO - [2025-07-20 17:21:45 TP0] Decode batch. #running-req: 1, #token: 2957, token usage: 0.08, gen throughput (token/s): 47.80, #queue-req: 0
  20990. 2025-07-20 17:21:45,371 - __main__ - INFO - sglang running req: 1 queue req: 0
  20991. 2025-07-20 17:21:46,203 - sglang - INFO - [2025-07-20 17:21:46 TP0] Decode batch. #running-req: 1, #token: 2997, token usage: 0.08, gen throughput (token/s): 48.08, #queue-req: 0
  20992. 2025-07-20 17:21:46,203 - __main__ - INFO - sglang running req: 1 queue req: 0
  20993. 2025-07-20 17:21:47,035 - sglang - INFO - [2025-07-20 17:21:47 TP0] Decode batch. #running-req: 1, #token: 3037, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
  20994. 2025-07-20 17:21:47,035 - __main__ - INFO - sglang running req: 1 queue req: 0
  20995. 2025-07-20 17:21:47,873 - sglang - INFO - [2025-07-20 17:21:47 TP0] Decode batch. #running-req: 1, #token: 3077, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
  20996. 2025-07-20 17:21:47,873 - __main__ - INFO - sglang running req: 1 queue req: 0
  20997. 2025-07-20 17:21:48,712 - sglang - INFO - [2025-07-20 17:21:48 TP0] Decode batch. #running-req: 1, #token: 3117, token usage: 0.08, gen throughput (token/s): 47.67, #queue-req: 0
  20998. 2025-07-20 17:21:48,713 - __main__ - INFO - sglang running req: 1 queue req: 0
  20999. 2025-07-20 17:21:49,551 - sglang - INFO - [2025-07-20 17:21:49 TP0] Decode batch. #running-req: 1, #token: 3157, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
  21000. 2025-07-20 17:21:49,551 - __main__ - INFO - sglang running req: 1 queue req: 0
  21001. 2025-07-20 17:21:50,391 - sglang - INFO - [2025-07-20 17:21:50 TP0] Decode batch. #running-req: 1, #token: 3197, token usage: 0.08, gen throughput (token/s): 47.58, #queue-req: 0
  21002. 2025-07-20 17:21:50,391 - __main__ - INFO - sglang running req: 1 queue req: 0
  21003. 2025-07-20 17:21:51,232 - sglang - INFO - [2025-07-20 17:21:51 TP0] Decode batch. #running-req: 1, #token: 3237, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
  21004. 2025-07-20 17:21:51,232 - __main__ - INFO - sglang running req: 1 queue req: 0
  21005. 2025-07-20 17:21:52,072 - sglang - INFO - [2025-07-20 17:21:52 TP0] Decode batch. #running-req: 1, #token: 3277, token usage: 0.09, gen throughput (token/s): 47.64, #queue-req: 0
  21006. 2025-07-20 17:21:52,072 - __main__ - INFO - sglang running req: 1 queue req: 0
  21007. 2025-07-20 17:21:52,431 - __main__ - INFO - Queue remaining: 0
  21008. 2025-07-20 17:21:52,432 - __main__ - INFO -
  21009. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21010. ----------------------------------------------------------------------------------
  21011. sglang_input_tokens 104.48 104.48
  21012. sglang_output_tokens 45.74 45.74
  21013. 2025-07-20 17:21:52,432 - __main__ - INFO -
  21014. Worker ID | finished | started
  21015. ----------+----------+--------
  21016. 0 | 10 | 11
  21017. 2025-07-20 17:21:52,908 - sglang - INFO - [2025-07-20 17:21:52 TP0] Decode batch. #running-req: 1, #token: 3317, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
  21018. 2025-07-20 17:21:52,908 - __main__ - INFO - sglang running req: 1 queue req: 0
  21019. 2025-07-20 17:21:53,743 - sglang - INFO - [2025-07-20 17:21:53 TP0] Decode batch. #running-req: 1, #token: 3357, token usage: 0.09, gen throughput (token/s): 47.87, #queue-req: 0
  21020. 2025-07-20 17:21:53,743 - __main__ - INFO - sglang running req: 1 queue req: 0
  21021. 2025-07-20 17:21:54,583 - sglang - INFO - [2025-07-20 17:21:54 TP0] Decode batch. #running-req: 1, #token: 3397, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
  21022. 2025-07-20 17:21:54,583 - __main__ - INFO - sglang running req: 1 queue req: 0
  21023. 2025-07-20 17:21:55,426 - sglang - INFO - [2025-07-20 17:21:55 TP0] Decode batch. #running-req: 1, #token: 3437, token usage: 0.09, gen throughput (token/s): 47.46, #queue-req: 0
  21024. 2025-07-20 17:21:55,426 - __main__ - INFO - sglang running req: 1 queue req: 0
  21025. 2025-07-20 17:21:56,268 - sglang - INFO - [2025-07-20 17:21:56 TP0] Decode batch. #running-req: 1, #token: 3477, token usage: 0.09, gen throughput (token/s): 47.50, #queue-req: 0
  21026. 2025-07-20 17:21:56,268 - __main__ - INFO - sglang running req: 1 queue req: 0
  21027. 2025-07-20 17:21:57,109 - sglang - INFO - [2025-07-20 17:21:57 TP0] Decode batch. #running-req: 1, #token: 3517, token usage: 0.09, gen throughput (token/s): 47.55, #queue-req: 0
  21028. 2025-07-20 17:21:57,109 - __main__ - INFO - sglang running req: 1 queue req: 0
  21029. 2025-07-20 17:21:57,952 - sglang - INFO - [2025-07-20 17:21:57 TP0] Decode batch. #running-req: 1, #token: 3557, token usage: 0.09, gen throughput (token/s): 47.46, #queue-req: 0
  21030. 2025-07-20 17:21:57,952 - __main__ - INFO - sglang running req: 1 queue req: 0
  21031. 2025-07-20 17:21:58,796 - sglang - INFO - [2025-07-20 17:21:58 TP0] Decode batch. #running-req: 1, #token: 3597, token usage: 0.09, gen throughput (token/s): 47.40, #queue-req: 0
  21032. 2025-07-20 17:21:58,796 - __main__ - INFO - sglang running req: 1 queue req: 0
  21033. 2025-07-20 17:21:59,637 - sglang - INFO - [2025-07-20 17:21:59 TP0] Decode batch. #running-req: 1, #token: 3637, token usage: 0.10, gen throughput (token/s): 47.54, #queue-req: 0
  21034. 2025-07-20 17:21:59,638 - __main__ - INFO - sglang running req: 1 queue req: 0
  21035. 2025-07-20 17:22:00,474 - sglang - INFO - [2025-07-20 17:22:00 TP0] Decode batch. #running-req: 1, #token: 3677, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
  21036. 2025-07-20 17:22:00,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  21037. 2025-07-20 17:22:01,315 - sglang - INFO - [2025-07-20 17:22:01 TP0] Decode batch. #running-req: 1, #token: 3717, token usage: 0.10, gen throughput (token/s): 47.58, #queue-req: 0
  21038. 2025-07-20 17:22:01,315 - __main__ - INFO - sglang running req: 1 queue req: 0
  21039. 2025-07-20 17:22:02,158 - sglang - INFO - [2025-07-20 17:22:02 TP0] Decode batch. #running-req: 1, #token: 3757, token usage: 0.10, gen throughput (token/s): 47.46, #queue-req: 0
  21040. 2025-07-20 17:22:02,158 - __main__ - INFO - sglang running req: 1 queue req: 0
  21041. 2025-07-20 17:22:02,433 - __main__ - INFO - Queue remaining: 0
  21042. 2025-07-20 17:22:02,433 - __main__ - INFO -
  21043. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21044. ----------------------------------------------------------------------------------
  21045. sglang_input_tokens 100.36 100.36
  21046. sglang_output_tokens 43.93 43.93
  21047. 2025-07-20 17:22:02,433 - __main__ - INFO -
  21048. Worker ID | finished | started
  21049. ----------+----------+--------
  21050. 0 | 10 | 11
  21051. 2025-07-20 17:22:03,002 - sglang - INFO - [2025-07-20 17:22:03 TP0] Decode batch. #running-req: 1, #token: 3797, token usage: 0.10, gen throughput (token/s): 47.38, #queue-req: 0
  21052. 2025-07-20 17:22:03,003 - __main__ - INFO - sglang running req: 1 queue req: 0
  21053. 2025-07-20 17:22:03,844 - sglang - INFO - [2025-07-20 17:22:03 TP0] Decode batch. #running-req: 1, #token: 3837, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
  21054. 2025-07-20 17:22:03,844 - __main__ - INFO - sglang running req: 1 queue req: 0
  21055. 2025-07-20 17:22:04,686 - sglang - INFO - [2025-07-20 17:22:04 TP0] Decode batch. #running-req: 1, #token: 3877, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
  21056. 2025-07-20 17:22:04,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  21057. 2025-07-20 17:22:05,530 - sglang - INFO - [2025-07-20 17:22:05 TP0] Decode batch. #running-req: 1, #token: 3917, token usage: 0.10, gen throughput (token/s): 47.40, #queue-req: 0
  21058. 2025-07-20 17:22:05,530 - __main__ - INFO - sglang running req: 1 queue req: 0
  21059. 2025-07-20 17:22:06,372 - sglang - INFO - [2025-07-20 17:22:06 TP0] Decode batch. #running-req: 1, #token: 3957, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
  21060. 2025-07-20 17:22:06,372 - __main__ - INFO - sglang running req: 1 queue req: 0
  21061. 2025-07-20 17:22:07,211 - sglang - INFO - [2025-07-20 17:22:07 TP0] Decode batch. #running-req: 1, #token: 3997, token usage: 0.11, gen throughput (token/s): 47.65, #queue-req: 0
  21062. 2025-07-20 17:22:07,212 - __main__ - INFO - sglang running req: 1 queue req: 0
  21063. 2025-07-20 17:22:08,048 - sglang - INFO - [2025-07-20 17:22:08 TP0] Decode batch. #running-req: 1, #token: 4037, token usage: 0.11, gen throughput (token/s): 47.78, #queue-req: 0
  21064. 2025-07-20 17:22:08,049 - __main__ - INFO - sglang running req: 1 queue req: 0
  21065. 2025-07-20 17:22:08,888 - sglang - INFO - [2025-07-20 17:22:08 TP0] Decode batch. #running-req: 1, #token: 4077, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  21066. 2025-07-20 17:22:08,888 - __main__ - INFO - sglang running req: 1 queue req: 0
  21067. 2025-07-20 17:22:09,731 - sglang - INFO - [2025-07-20 17:22:09 TP0] Decode batch. #running-req: 1, #token: 4117, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
  21068. 2025-07-20 17:22:09,731 - __main__ - INFO - sglang running req: 1 queue req: 0
  21069. 2025-07-20 17:22:10,575 - sglang - INFO - [2025-07-20 17:22:10 TP0] Decode batch. #running-req: 1, #token: 4157, token usage: 0.11, gen throughput (token/s): 47.40, #queue-req: 0
  21070. 2025-07-20 17:22:10,575 - __main__ - INFO - sglang running req: 1 queue req: 0
  21071. 2025-07-20 17:22:11,416 - sglang - INFO - [2025-07-20 17:22:11 TP0] Decode batch. #running-req: 1, #token: 4197, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
  21072. 2025-07-20 17:22:11,417 - __main__ - INFO - sglang running req: 1 queue req: 0
  21073. 2025-07-20 17:22:12,261 - sglang - INFO - [2025-07-20 17:22:12 TP0] Decode batch. #running-req: 1, #token: 4237, token usage: 0.11, gen throughput (token/s): 47.37, #queue-req: 0
  21074. 2025-07-20 17:22:12,261 - __main__ - INFO - sglang running req: 1 queue req: 0
  21075. 2025-07-20 17:22:12,434 - __main__ - INFO - Queue remaining: 0
  21076. 2025-07-20 17:22:12,434 - __main__ - INFO -
  21077. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21078. ----------------------------------------------------------------------------------
  21079. sglang_input_tokens 96.55 96.55
  21080. sglang_output_tokens 42.27 42.27
  21081. 2025-07-20 17:22:12,434 - __main__ - INFO -
  21082. Worker ID | finished | started
  21083. ----------+----------+--------
  21084. 0 | 10 | 11
  21085. 2025-07-20 17:22:13,106 - sglang - INFO - [2025-07-20 17:22:13 TP0] Decode batch. #running-req: 1, #token: 4277, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
  21086. 2025-07-20 17:22:13,106 - __main__ - INFO - sglang running req: 1 queue req: 0
  21087. 2025-07-20 17:22:13,949 - sglang - INFO - [2025-07-20 17:22:13 TP0] Decode batch. #running-req: 1, #token: 4317, token usage: 0.11, gen throughput (token/s): 47.43, #queue-req: 0
  21088. 2025-07-20 17:22:13,949 - __main__ - INFO - sglang running req: 1 queue req: 0
  21089. 2025-07-20 17:22:14,789 - sglang - INFO - [2025-07-20 17:22:14 TP0] Decode batch. #running-req: 1, #token: 4357, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  21090. 2025-07-20 17:22:14,789 - __main__ - INFO - sglang running req: 1 queue req: 0
  21091. 2025-07-20 17:22:15,631 - sglang - INFO - [2025-07-20 17:22:15 TP0] Decode batch. #running-req: 1, #token: 4397, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
  21092. 2025-07-20 17:22:15,631 - __main__ - INFO - sglang running req: 1 queue req: 0
  21093. 2025-07-20 17:22:16,473 - sglang - INFO - [2025-07-20 17:22:16 TP0] Decode batch. #running-req: 1, #token: 4437, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
  21094. 2025-07-20 17:22:16,473 - __main__ - INFO - sglang running req: 1 queue req: 0
  21095. 2025-07-20 17:22:17,318 - sglang - INFO - [2025-07-20 17:22:17 TP0] Decode batch. #running-req: 1, #token: 4477, token usage: 0.12, gen throughput (token/s): 47.35, #queue-req: 0
  21096. 2025-07-20 17:22:17,318 - __main__ - INFO - sglang running req: 1 queue req: 0
  21097. 2025-07-20 17:22:18,163 - sglang - INFO - [2025-07-20 17:22:18 TP0] Decode batch. #running-req: 1, #token: 4517, token usage: 0.12, gen throughput (token/s): 47.33, #queue-req: 0
  21098. 2025-07-20 17:22:18,163 - __main__ - INFO - sglang running req: 1 queue req: 0
  21099. 2025-07-20 17:22:19,005 - sglang - INFO - [2025-07-20 17:22:19 TP0] Decode batch. #running-req: 1, #token: 4557, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  21100. 2025-07-20 17:22:19,005 - __main__ - INFO - sglang running req: 1 queue req: 0
  21101. 2025-07-20 17:22:19,851 - sglang - INFO - [2025-07-20 17:22:19 TP0] Decode batch. #running-req: 1, #token: 4597, token usage: 0.12, gen throughput (token/s): 47.27, #queue-req: 0
  21102. 2025-07-20 17:22:19,851 - __main__ - INFO - sglang running req: 1 queue req: 0
  21103. 2025-07-20 17:22:20,697 - sglang - INFO - [2025-07-20 17:22:20 TP0] Decode batch. #running-req: 1, #token: 4637, token usage: 0.12, gen throughput (token/s): 47.28, #queue-req: 0
  21104. 2025-07-20 17:22:20,697 - __main__ - INFO - sglang running req: 1 queue req: 0
  21105. 2025-07-20 17:22:21,542 - sglang - INFO - [2025-07-20 17:22:21 TP0] Decode batch. #running-req: 1, #token: 4677, token usage: 0.12, gen throughput (token/s): 47.33, #queue-req: 0
  21106. 2025-07-20 17:22:21,542 - __main__ - INFO - sglang running req: 1 queue req: 0
  21107. 2025-07-20 17:22:22,386 - sglang - INFO - [2025-07-20 17:22:22 TP0] Decode batch. #running-req: 1, #token: 4717, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
  21108. 2025-07-20 17:22:22,386 - __main__ - INFO - sglang running req: 1 queue req: 0
  21109. 2025-07-20 17:22:22,435 - __main__ - INFO - Queue remaining: 0
  21110. 2025-07-20 17:22:22,436 - __main__ - INFO -
  21111. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21112. ----------------------------------------------------------------------------------
  21113. sglang_input_tokens 93.02 93.02
  21114. sglang_output_tokens 40.72 40.72
  21115. 2025-07-20 17:22:22,436 - __main__ - INFO -
  21116. Worker ID | finished | started
  21117. ----------+----------+--------
  21118. 0 | 10 | 11
  21119. 2025-07-20 17:22:23,227 - sglang - INFO - [2025-07-20 17:22:23 TP0] Decode batch. #running-req: 1, #token: 4757, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
  21120. 2025-07-20 17:22:23,228 - __main__ - INFO - sglang running req: 1 queue req: 0
  21121. 2025-07-20 17:22:24,070 - sglang - INFO - [2025-07-20 17:22:24 TP0] Decode batch. #running-req: 1, #token: 4797, token usage: 0.13, gen throughput (token/s): 47.47, #queue-req: 0
  21122. 2025-07-20 17:22:24,070 - __main__ - INFO - sglang running req: 1 queue req: 0
  21123. 2025-07-20 17:22:24,916 - sglang - INFO - [2025-07-20 17:22:24 TP0] Decode batch. #running-req: 1, #token: 4837, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
  21124. 2025-07-20 17:22:24,916 - __main__ - INFO - sglang running req: 1 queue req: 0
  21125. 2025-07-20 17:22:25,762 - sglang - INFO - [2025-07-20 17:22:25 TP0] Decode batch. #running-req: 1, #token: 4877, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
  21126. 2025-07-20 17:22:25,762 - __main__ - INFO - sglang running req: 1 queue req: 0
  21127. 2025-07-20 17:22:26,607 - sglang - INFO - [2025-07-20 17:22:26 TP0] Decode batch. #running-req: 1, #token: 4917, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
  21128. 2025-07-20 17:22:26,608 - __main__ - INFO - sglang running req: 1 queue req: 0
  21129. 2025-07-20 17:22:27,454 - sglang - INFO - [2025-07-20 17:22:27 TP0] Decode batch. #running-req: 1, #token: 4957, token usage: 0.13, gen throughput (token/s): 47.23, #queue-req: 0
  21130. 2025-07-20 17:22:27,455 - __main__ - INFO - sglang running req: 1 queue req: 0
  21131. 2025-07-20 17:22:28,300 - sglang - INFO - [2025-07-20 17:22:28 TP0] Decode batch. #running-req: 1, #token: 4997, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
  21132. 2025-07-20 17:22:28,300 - __main__ - INFO - sglang running req: 1 queue req: 0
  21133. 2025-07-20 17:22:29,143 - sglang - INFO - [2025-07-20 17:22:29 TP0] Decode batch. #running-req: 1, #token: 5037, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
  21134. 2025-07-20 17:22:29,143 - __main__ - INFO - sglang running req: 1 queue req: 0
  21135. 2025-07-20 17:22:29,988 - sglang - INFO - [2025-07-20 17:22:29 TP0] Decode batch. #running-req: 1, #token: 5077, token usage: 0.13, gen throughput (token/s): 47.36, #queue-req: 0
  21136. 2025-07-20 17:22:29,988 - __main__ - INFO - sglang running req: 1 queue req: 0
  21137. 2025-07-20 17:22:30,078 - __main__ - WARNING - JSON decode error on attempt 2 for test_pdf/1144520000702630XG3440106001004.pdf-8: Unterminated string starting at: line 1 column 125 (char 124)
  21138. 2025-07-20 17:22:30,207 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  21139. 2025-07-20 17:22:30,437 - sglang - INFO - [2025-07-20 17:22:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2082, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21140. 2025-07-20 17:22:30,437 - __main__ - INFO - sglang running req: 0 queue req: 0
  21141. 2025-07-20 17:22:31,867 - sglang - INFO - [2025-07-20 17:22:31 TP0] Decode batch. #running-req: 1, #token: 2118, token usage: 0.06, gen throughput (token/s): 21.29, #queue-req: 0
  21142. 2025-07-20 17:22:31,867 - __main__ - INFO - sglang running req: 1 queue req: 0
  21143. 2025-07-20 17:22:32,438 - __main__ - INFO - Queue remaining: 0
  21144. 2025-07-20 17:22:32,438 - __main__ - INFO -
  21145. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21146. ----------------------------------------------------------------------------------
  21147. sglang_input_tokens 97.08 97.08
  21148. sglang_output_tokens 49.86 49.86
  21149. 2025-07-20 17:22:32,438 - __main__ - INFO -
  21150. Worker ID | finished | started
  21151. ----------+----------+--------
  21152. 0 | 10 | 11
  21153. 2025-07-20 17:22:32,709 - sglang - INFO - [2025-07-20 17:22:32 TP0] Decode batch. #running-req: 1, #token: 2158, token usage: 0.06, gen throughput (token/s): 47.51, #queue-req: 0
  21154. 2025-07-20 17:22:32,709 - __main__ - INFO - sglang running req: 1 queue req: 0
  21155. 2025-07-20 17:22:33,548 - sglang - INFO - [2025-07-20 17:22:33 TP0] Decode batch. #running-req: 1, #token: 2198, token usage: 0.06, gen throughput (token/s): 47.67, #queue-req: 0
  21156. 2025-07-20 17:22:33,548 - __main__ - INFO - sglang running req: 1 queue req: 0
  21157. 2025-07-20 17:22:34,386 - sglang - INFO - [2025-07-20 17:22:34 TP0] Decode batch. #running-req: 1, #token: 2238, token usage: 0.06, gen throughput (token/s): 47.69, #queue-req: 0
  21158. 2025-07-20 17:22:34,387 - __main__ - INFO - sglang running req: 1 queue req: 0
  21159. 2025-07-20 17:22:35,228 - sglang - INFO - [2025-07-20 17:22:35 TP0] Decode batch. #running-req: 1, #token: 2278, token usage: 0.06, gen throughput (token/s): 47.54, #queue-req: 0
  21160. 2025-07-20 17:22:35,228 - __main__ - INFO - sglang running req: 1 queue req: 0
  21161. 2025-07-20 17:22:36,070 - sglang - INFO - [2025-07-20 17:22:36 TP0] Decode batch. #running-req: 1, #token: 2318, token usage: 0.06, gen throughput (token/s): 47.52, #queue-req: 0
  21162. 2025-07-20 17:22:36,070 - __main__ - INFO - sglang running req: 1 queue req: 0
  21163. 2025-07-20 17:22:36,908 - sglang - INFO - [2025-07-20 17:22:36 TP0] Decode batch. #running-req: 1, #token: 2358, token usage: 0.06, gen throughput (token/s): 47.70, #queue-req: 0
  21164. 2025-07-20 17:22:36,908 - __main__ - INFO - sglang running req: 1 queue req: 0
  21165. 2025-07-20 17:22:37,747 - sglang - INFO - [2025-07-20 17:22:37 TP0] Decode batch. #running-req: 1, #token: 2398, token usage: 0.06, gen throughput (token/s): 47.71, #queue-req: 0
  21166. 2025-07-20 17:22:37,747 - __main__ - INFO - sglang running req: 1 queue req: 0
  21167. 2025-07-20 17:22:38,587 - sglang - INFO - [2025-07-20 17:22:38 TP0] Decode batch. #running-req: 1, #token: 2438, token usage: 0.06, gen throughput (token/s): 47.59, #queue-req: 0
  21168. 2025-07-20 17:22:38,587 - __main__ - INFO - sglang running req: 1 queue req: 0
  21169. 2025-07-20 17:22:39,429 - sglang - INFO - [2025-07-20 17:22:39 TP0] Decode batch. #running-req: 1, #token: 2478, token usage: 0.07, gen throughput (token/s): 47.53, #queue-req: 0
  21170. 2025-07-20 17:22:39,429 - __main__ - INFO - sglang running req: 1 queue req: 0
  21171. 2025-07-20 17:22:40,271 - sglang - INFO - [2025-07-20 17:22:40 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 47.49, #queue-req: 0
  21172. 2025-07-20 17:22:40,271 - __main__ - INFO - sglang running req: 1 queue req: 0
  21173. 2025-07-20 17:22:40,277 - __main__ - INFO - Finished TaskGroup for worker on 9face5eb793573e747789b627bf1cc4b334b5b93
  21174. 2025-07-20 17:22:40,277 - __main__ - INFO - Got 1 docs for 9face5eb793573e747789b627bf1cc4b334b5b93
  21175. 2025-07-20 17:22:40,279 - __main__ - INFO - Worker 0 exiting due to empty queue
  21176. 2025-07-20 17:22:40,279 - __main__ - INFO - Work done
  21177. 2025-07-20 17:22:40,280 - __main__ - INFO - Got cancellation request for SGLang server
  21178. 2025-07-20 17:24:46,300 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  21179. 2025-07-20 17:24:46,301 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106001004.pdf as PDF document
  21180. 2025-07-20 17:24:46,301 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106004000.pdf as PDF document
  21181. 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106008000.pdf as PDF document
  21182. 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106011000.pdf as PDF document
  21183. 2025-07-20 17:24:46,302 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013001.pdf as PDF document
  21184. 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013002.pdf as PDF document
  21185. 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013003.pdf as PDF document
  21186. 2025-07-20 17:24:46,303 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106013004.pdf as PDF document
  21187. 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106016000.pdf as PDF document
  21188. 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106018000.pdf as PDF document
  21189. 2025-07-20 17:24:46,304 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106028002.pdf as PDF document
  21190. 2025-07-20 17:24:46,305 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029005.pdf as PDF document
  21191. 2025-07-20 17:24:46,305 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900602.pdf as PDF document
  21192. 2025-07-20 17:24:46,306 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900701.pdf as PDF document
  21193. 2025-07-20 17:24:46,306 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900702.pdf as PDF document
  21194. 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106029008.pdf as PDF document
  21195. 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900901.pdf as PDF document
  21196. 2025-07-20 17:24:46,307 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602900902.pdf as PDF document
  21197. 2025-07-20 17:24:46,308 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901001.pdf as PDF document
  21198. 2025-07-20 17:24:46,308 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010602901002.pdf as PDF document
  21199. 2025-07-20 17:24:46,309 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010603501801.pdf as PDF document
  21200. 2025-07-20 17:24:46,309 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG3440106041000.pdf as PDF document
  21201. 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604200101.pdf as PDF document
  21202. 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604300102.pdf as PDF document
  21203. 2025-07-20 17:24:46,310 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301101.pdf as PDF document
  21204. 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301201.pdf as PDF document
  21205. 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301601.pdf as PDF document
  21206. 2025-07-20 17:24:46,311 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301801.pdf as PDF document
  21207. 2025-07-20 17:24:46,312 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604301901.pdf as PDF document
  21208. 2025-07-20 17:24:46,312 - __main__ - INFO - Loading file at test_pdf/1144520000702630XG344010604302101.pdf as PDF document
  21209. 2025-07-20 17:24:46,312 - __main__ - INFO - Found 30 total pdf paths to add
  21210. 2025-07-20 17:24:46,403 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 7.60
  21211. 2025-07-20 17:24:46,634 - __main__ - INFO - Starting pipeline with PID 632984
  21212. 2025-07-20 17:24:46,634 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  21213. 2025-07-20 17:24:46,719 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  21214. 2025-07-20 17:24:47,751 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  21215. 2025-07-20 17:24:48,799 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  21216. 2025-07-20 17:24:49,865 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  21217. 2025-07-20 17:24:50,936 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  21218. 2025-07-20 17:24:52,007 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  21219. 2025-07-20 17:24:53,056 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  21220. 2025-07-20 17:24:53,210 - sglang - INFO - [2025-07-20 17:24:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192306107, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  21221. 2025-07-20 17:24:53,210 - __main__ - INFO - [2025-07-20 17:24:53] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=192306107, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  21222. 2025-07-20 17:24:54,131 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  21223. 2025-07-20 17:24:54,281 - sglang - INFO - [2025-07-20 17:24:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  21224. 2025-07-20 17:24:54,281 - __main__ - INFO - [2025-07-20 17:24:54] Use chat template for the OpenAI-compatible API server: qwen2-vl
  21225. 2025-07-20 17:24:55,207 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  21226. 2025-07-20 17:24:56,275 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  21227. 2025-07-20 17:24:57,343 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  21228. 2025-07-20 17:24:58,411 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  21229. 2025-07-20 17:24:59,479 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  21230. 2025-07-20 17:25:00,545 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  21231. 2025-07-20 17:25:00,944 - sglang - INFO - [2025-07-20 17:25:00 TP0] Overlap scheduler is disabled for multimodal models.
  21232. 2025-07-20 17:25:00,944 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Overlap scheduler is disabled for multimodal models.
  21233. 2025-07-20 17:25:00,947 - sglang - INFO - [2025-07-20 17:25:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  21234. 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  21235. 2025-07-20 17:25:00,948 - sglang - INFO - [2025-07-20 17:25:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  21236. 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  21237. 2025-07-20 17:25:00,948 - sglang - INFO - [2025-07-20 17:25:00 TP0] Init torch distributed begin.
  21238. 2025-07-20 17:25:00,948 - __main__ - INFO - [2025-07-20 17:25:00 TP0] Init torch distributed begin.
  21239. 2025-07-20 17:25:01,603 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  21240. 2025-07-20 17:25:02,648 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  21241. 2025-07-20 17:25:03,690 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  21242. 2025-07-20 17:25:04,733 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  21243. 2025-07-20 17:25:05,766 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  21244. 2025-07-20 17:25:06,360 - sglang - INFO - [2025-07-20 17:25:06 TP0] Load weight begin. avail mem=23.33 GB
  21245. 2025-07-20 17:25:06,360 - __main__ - INFO - [2025-07-20 17:25:06 TP0] Load weight begin. avail mem=23.33 GB
  21246. 2025-07-20 17:25:06,809 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  21247. 2025-07-20 17:25:07,069 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  21248. 2025-07-20 17:25:07,070 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  21249. 2025-07-20 17:25:07,851 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  21250. 2025-07-20 17:25:08,892 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  21251. 2025-07-20 17:25:09,941 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  21252. 2025-07-20 17:25:11,003 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  21253. 2025-07-20 17:25:12,074 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  21254. 2025-07-20 17:25:13,147 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  21255. 2025-07-20 17:25:14,214 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  21256. 2025-07-20 17:25:14,762 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:07<00:23, 7.69s/it]
  21257. 2025-07-20 17:25:14,762 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:07<00:23, 7.69s/it]
  21258. 2025-07-20 17:25:15,290 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready...
  21259. 2025-07-20 17:25:16,358 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready...
  21260. 2025-07-20 17:25:17,425 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready...
  21261. 2025-07-20 17:25:18,497 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready...
  21262. 2025-07-20 17:25:19,569 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready...
  21263. 2025-07-20 17:25:20,637 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready...
  21264. 2025-07-20 17:25:21,709 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready...
  21265. 2025-07-20 17:25:21,717 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:14<00:14, 7.26s/it]
  21266. 2025-07-20 17:25:21,717 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:14<00:14, 7.26s/it]
  21267. 2025-07-20 17:25:22,781 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready...
  21268. 2025-07-20 17:25:23,841 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready...
  21269. 2025-07-20 17:25:24,896 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready...
  21270. 2025-07-20 17:25:25,963 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready...
  21271. 2025-07-20 17:25:27,030 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready...
  21272. 2025-07-20 17:25:28,096 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready...
  21273. 2025-07-20 17:25:29,164 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready...
  21274. 2025-07-20 17:25:29,488 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:22<00:07, 7.49s/it]
  21275. 2025-07-20 17:25:29,488 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:22<00:07, 7.49s/it]
  21276. 2025-07-20 17:25:30,240 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready...
  21277. 2025-07-20 17:25:31,308 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready...
  21278. 2025-07-20 17:25:32,376 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready...
  21279. 2025-07-20 17:25:32,412 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 5.69s/it]
  21280. 2025-07-20 17:25:32,412 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 5.69s/it]
  21281. 2025-07-20 17:25:32,412 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 6.34s/it]
  21282. 2025-07-20 17:25:32,412 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:25<00:00, 6.34s/it]
  21283. 2025-07-20 17:25:32,412 - sglang - INFO -
  21284. 2025-07-20 17:25:32,412 - __main__ - INFO -
  21285. 2025-07-20 17:25:32,472 - sglang - INFO - [2025-07-20 17:25:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  21286. 2025-07-20 17:25:32,472 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  21287. 2025-07-20 17:25:32,480 - sglang - INFO - [2025-07-20 17:25:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  21288. 2025-07-20 17:25:32,480 - __main__ - INFO - [2025-07-20 17:25:32 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  21289. 2025-07-20 17:25:32,480 - sglang - INFO - [2025-07-20 17:25:32 TP0] Memory pool end. avail mem=5.30 GB
  21290. 2025-07-20 17:25:32,480 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Memory pool end. avail mem=5.30 GB
  21291. 2025-07-20 17:25:32,659 - sglang - INFO - [2025-07-20 17:25:32 TP0] Capture cuda graph begin. This can take up to several minutes.
  21292. 2025-07-20 17:25:32,659 - __main__ - INFO - [2025-07-20 17:25:32 TP0] Capture cuda graph begin. This can take up to several minutes.
  21293. 2025-07-20 17:25:33,473 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready...
  21294. 2025-07-20 17:25:34,549 - __main__ - WARNING - Attempt 46: Please wait for sglang server to become ready...
  21295. 2025-07-20 17:25:34,883 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.33s/it] 50%|█████ | 2/4 [00:01<00:01, 1.39it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.90it/s] 100%|██████████| 4/4 [00:02<00:00, 2.28it/s] 100%|██████████| 4/4 [00:02<00:00, 1.80it/s]
  21296. 2025-07-20 17:25:34,883 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.33s/it] 50%|█████ | 2/4 [00:01<00:01, 1.39it/s] 75%|███████▌ | 3/4 [00:01<00:00, 1.90it/s] 100%|██████████| 4/4 [00:02<00:00, 2.28it/s] 100%|██████████| 4/4 [00:02<00:00, 1.80it/s]
  21297. 2025-07-20 17:25:34,883 - sglang - INFO - [2025-07-20 17:25:34 TP0] Capture cuda graph end. Time elapsed: 2.22 s
  21298. 2025-07-20 17:25:34,883 - __main__ - INFO - [2025-07-20 17:25:34 TP0] Capture cuda graph end. Time elapsed: 2.22 s
  21299. 2025-07-20 17:25:35,635 - __main__ - WARNING - Attempt 47: Please wait for sglang server to become ready...
  21300. 2025-07-20 17:25:35,756 - sglang - INFO - [2025-07-20 17:25:35 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  21301. 2025-07-20 17:25:35,757 - __main__ - INFO - [2025-07-20 17:25:35 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  21302. 2025-07-20 17:25:36,722 - __main__ - INFO - sglang server is ready.
  21303. 2025-07-20 17:25:36,723 - __main__ - INFO - Queue remaining: 30
  21304. 2025-07-20 17:25:36,723 - __main__ - INFO -
  21305. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21306. ----------------------------------------------------------------------------------
  21307. 2025-07-20 17:25:36,723 - __main__ - INFO -
  21308. Worker ID
  21309. ---------
  21310. 2025-07-20 17:25:36,723 - __main__ - INFO - Worker 0 processing work item b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
  21311. 2025-07-20 17:25:36,723 - __main__ - INFO - Created all tasks for b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
  21312. 2025-07-20 17:25:36,727 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG344010604302101.pdf in worker 0
  21313. 2025-07-20 17:25:36,861 - sglang - INFO - [2025-07-20 17:25:36 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21314. 2025-07-20 17:25:36,861 - __main__ - INFO - [2025-07-20 17:25:36 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21315. 2025-07-20 17:25:36,862 - __main__ - INFO - sglang running req: 0 queue req: 0
  21316. 2025-07-20 17:25:37,636 - sglang - INFO - [2025-07-20 17:25:37] The server is fired up and ready to roll!
  21317. 2025-07-20 17:25:37,636 - __main__ - INFO - [2025-07-20 17:25:37] The server is fired up and ready to roll!
  21318. 2025-07-20 17:25:43,354 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-1
  21319. 2025-07-20 17:25:43,380 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-3
  21320. 2025-07-20 17:25:43,397 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-2
  21321. 2025-07-20 17:25:43,407 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-4
  21322. 2025-07-20 17:25:43,429 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  21323. 2025-07-20 17:25:43,431 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-6
  21324. 2025-07-20 17:25:43,456 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-7
  21325. 2025-07-20 17:25:43,505 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-11
  21326. 2025-07-20 17:25:43,509 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-10
  21327. 2025-07-20 17:25:43,515 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-9
  21328. 2025-07-20 17:25:43,562 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-8
  21329. 2025-07-20 17:25:46,733 - __main__ - INFO - Queue remaining: 29
  21330. 2025-07-20 17:25:46,733 - __main__ - INFO -
  21331. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21332. ----------------------------------------------------------------------------------
  21333. 2025-07-20 17:25:46,734 - __main__ - INFO -
  21334. Worker ID | started
  21335. ----------+--------
  21336. 0 | 11
  21337. 2025-07-20 17:25:56,736 - __main__ - INFO - Queue remaining: 29
  21338. 2025-07-20 17:25:56,737 - __main__ - INFO -
  21339. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21340. ----------------------------------------------------------------------------------
  21341. 2025-07-20 17:25:56,737 - __main__ - INFO -
  21342. Worker ID | started
  21343. ----------+--------
  21344. 0 | 11
  21345. 2025-07-20 17:26:06,334 - sglang - INFO - [2025-07-20 17:26:06 TP0] Prefill batch. #new-seq: 1, #new-token: 2517, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21346. 2025-07-20 17:26:06,337 - __main__ - INFO - sglang running req: 0 queue req: 0
  21347. 2025-07-20 17:26:06,739 - __main__ - INFO - Queue remaining: 29
  21348. 2025-07-20 17:26:06,739 - __main__ - INFO -
  21349. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21350. ----------------------------------------------------------------------------------
  21351. 2025-07-20 17:26:06,739 - __main__ - INFO -
  21352. Worker ID | started
  21353. ----------+--------
  21354. 0 | 11
  21355. 2025-07-20 17:26:08,146 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  21356. 2025-07-20 17:26:11,260 - sglang - INFO - [2025-07-20 17:26:11 TP0] Prefill batch. #new-seq: 6, #new-token: 12249, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 4
  21357. 2025-07-20 17:26:11,260 - __main__ - INFO - sglang running req: 1 queue req: 4
  21358. 2025-07-20 17:26:15,912 - sglang - INFO - [2025-07-20 17:26:15 TP0] Decode batch. #running-req: 7, #token: 14997, token usage: 0.39, gen throughput (token/s): 5.93, #queue-req: 4
  21359. 2025-07-20 17:26:15,912 - __main__ - INFO - sglang running req: 7 queue req: 4
  21360. 2025-07-20 17:26:16,741 - __main__ - INFO - Queue remaining: 29
  21361. 2025-07-20 17:26:16,741 - __main__ - INFO -
  21362. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21363. ----------------------------------------------------------------------------------
  21364. 2025-07-20 17:26:16,741 - __main__ - INFO -
  21365. Worker ID | started
  21366. ----------+--------
  21367. 0 | 11
  21368. 2025-07-20 17:26:16,793 - sglang - INFO - [2025-07-20 17:26:16 TP0] Decode batch. #running-req: 7, #token: 15277, token usage: 0.40, gen throughput (token/s): 317.79, #queue-req: 4
  21369. 2025-07-20 17:26:16,794 - __main__ - INFO - sglang running req: 7 queue req: 4
  21370. 2025-07-20 17:26:17,676 - sglang - INFO - [2025-07-20 17:26:17 TP0] Decode batch. #running-req: 7, #token: 15557, token usage: 0.41, gen throughput (token/s): 317.40, #queue-req: 4
  21371. 2025-07-20 17:26:17,676 - __main__ - INFO - sglang running req: 7 queue req: 4
  21372. 2025-07-20 17:26:18,561 - sglang - INFO - [2025-07-20 17:26:18 TP0] Decode batch. #running-req: 7, #token: 15837, token usage: 0.42, gen throughput (token/s): 316.32, #queue-req: 4
  21373. 2025-07-20 17:26:18,561 - __main__ - INFO - sglang running req: 7 queue req: 4
  21374. 2025-07-20 17:26:19,447 - sglang - INFO - [2025-07-20 17:26:19 TP0] Decode batch. #running-req: 7, #token: 16117, token usage: 0.42, gen throughput (token/s): 315.82, #queue-req: 4
  21375. 2025-07-20 17:26:19,448 - __main__ - INFO - sglang running req: 7 queue req: 4
  21376. 2025-07-20 17:26:20,334 - sglang - INFO - [2025-07-20 17:26:20 TP0] Decode batch. #running-req: 7, #token: 16397, token usage: 0.43, gen throughput (token/s): 315.80, #queue-req: 4
  21377. 2025-07-20 17:26:20,334 - __main__ - INFO - sglang running req: 7 queue req: 4
  21378. 2025-07-20 17:26:21,222 - sglang - INFO - [2025-07-20 17:26:21 TP0] Decode batch. #running-req: 7, #token: 16677, token usage: 0.44, gen throughput (token/s): 315.33, #queue-req: 4
  21379. 2025-07-20 17:26:21,222 - __main__ - INFO - sglang running req: 7 queue req: 4
  21380. 2025-07-20 17:26:22,110 - sglang - INFO - [2025-07-20 17:26:22 TP0] Decode batch. #running-req: 7, #token: 16957, token usage: 0.45, gen throughput (token/s): 315.36, #queue-req: 4
  21381. 2025-07-20 17:26:22,110 - __main__ - INFO - sglang running req: 7 queue req: 4
  21382. 2025-07-20 17:26:22,288 - sglang - INFO - [2025-07-20 17:26:22 TP0] Prefill batch. #new-seq: 3, #new-token: 6997, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 1
  21383. 2025-07-20 17:26:22,288 - __main__ - INFO - sglang running req: 6 queue req: 1
  21384. 2025-07-20 17:26:25,101 - sglang - INFO - [2025-07-20 17:26:25 TP0] Decode batch. #running-req: 9, #token: 21921, token usage: 0.58, gen throughput (token/s): 114.68, #queue-req: 1
  21385. 2025-07-20 17:26:25,101 - __main__ - INFO - sglang running req: 9 queue req: 1
  21386. 2025-07-20 17:26:26,050 - sglang - INFO - [2025-07-20 17:26:26 TP0] Decode batch. #running-req: 9, #token: 22281, token usage: 0.59, gen throughput (token/s): 379.10, #queue-req: 1
  21387. 2025-07-20 17:26:26,051 - __main__ - INFO - sglang running req: 9 queue req: 1
  21388. 2025-07-20 17:26:26,144 - sglang - INFO - [2025-07-20 17:26:26 TP0] Prefill batch. #new-seq: 1, #new-token: 2128, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.53, #running-req: 8, #queue-req: 0
  21389. 2025-07-20 17:26:26,144 - __main__ - INFO - sglang running req: 8 queue req: 0
  21390. 2025-07-20 17:26:26,743 - __main__ - INFO - Queue remaining: 29
  21391. 2025-07-20 17:26:26,744 - __main__ - INFO -
  21392. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21393. ----------------------------------------------------------------------------------
  21394. sglang_input_tokens 38.44 38.44
  21395. sglang_output_tokens 7.15 7.15
  21396. 2025-07-20 17:26:26,744 - __main__ - INFO -
  21397. Worker ID | finished | started
  21398. ----------+----------+--------
  21399. 0 | 2 | 11
  21400. 2025-07-20 17:26:27,652 - sglang - INFO - [2025-07-20 17:26:27 TP0] Decode batch. #running-req: 9, #token: 22567, token usage: 0.59, gen throughput (token/s): 224.15, #queue-req: 0
  21401. 2025-07-20 17:26:27,652 - __main__ - INFO - sglang running req: 9 queue req: 0
  21402. 2025-07-20 17:26:28,558 - sglang - INFO - [2025-07-20 17:26:28 TP0] Decode batch. #running-req: 7, #token: 17913, token usage: 0.47, gen throughput (token/s): 348.65, #queue-req: 0
  21403. 2025-07-20 17:26:28,559 - __main__ - INFO - sglang running req: 7 queue req: 0
  21404. 2025-07-20 17:26:29,448 - sglang - INFO - [2025-07-20 17:26:29 TP0] Decode batch. #running-req: 6, #token: 15533, token usage: 0.41, gen throughput (token/s): 312.49, #queue-req: 0
  21405. 2025-07-20 17:26:29,448 - __main__ - INFO - sglang running req: 6 queue req: 0
  21406. 2025-07-20 17:26:30,330 - sglang - INFO - [2025-07-20 17:26:30 TP0] Decode batch. #running-req: 6, #token: 15773, token usage: 0.42, gen throughput (token/s): 272.00, #queue-req: 0
  21407. 2025-07-20 17:26:30,330 - __main__ - INFO - sglang running req: 6 queue req: 0
  21408. 2025-07-20 17:26:31,208 - sglang - INFO - [2025-07-20 17:26:31 TP0] Decode batch. #running-req: 5, #token: 12903, token usage: 0.34, gen throughput (token/s): 259.85, #queue-req: 0
  21409. 2025-07-20 17:26:31,208 - __main__ - INFO - sglang running req: 5 queue req: 0
  21410. 2025-07-20 17:26:32,075 - sglang - INFO - [2025-07-20 17:26:32 TP0] Decode batch. #running-req: 5, #token: 13103, token usage: 0.34, gen throughput (token/s): 230.51, #queue-req: 0
  21411. 2025-07-20 17:26:32,076 - __main__ - INFO - sglang running req: 5 queue req: 0
  21412. 2025-07-20 17:26:32,948 - sglang - INFO - [2025-07-20 17:26:32 TP0] Decode batch. #running-req: 5, #token: 13303, token usage: 0.35, gen throughput (token/s): 229.17, #queue-req: 0
  21413. 2025-07-20 17:26:32,948 - __main__ - INFO - sglang running req: 5 queue req: 0
  21414. 2025-07-20 17:26:33,814 - sglang - INFO - [2025-07-20 17:26:33 TP0] Decode batch. #running-req: 4, #token: 11136, token usage: 0.29, gen throughput (token/s): 187.00, #queue-req: 0
  21415. 2025-07-20 17:26:33,815 - __main__ - INFO - sglang running req: 4 queue req: 0
  21416. 2025-07-20 17:26:34,672 - sglang - INFO - [2025-07-20 17:26:34 TP0] Decode batch. #running-req: 3, #token: 8812, token usage: 0.23, gen throughput (token/s): 141.10, #queue-req: 0
  21417. 2025-07-20 17:26:34,672 - __main__ - INFO - sglang running req: 3 queue req: 0
  21418. 2025-07-20 17:26:35,527 - sglang - INFO - [2025-07-20 17:26:35 TP0] Decode batch. #running-req: 3, #token: 8932, token usage: 0.24, gen throughput (token/s): 140.37, #queue-req: 0
  21419. 2025-07-20 17:26:35,527 - __main__ - INFO - sglang running req: 3 queue req: 0
  21420. 2025-07-20 17:26:36,384 - sglang - INFO - [2025-07-20 17:26:36 TP0] Decode batch. #running-req: 3, #token: 9052, token usage: 0.24, gen throughput (token/s): 140.08, #queue-req: 0
  21421. 2025-07-20 17:26:36,384 - __main__ - INFO - sglang running req: 3 queue req: 0
  21422. 2025-07-20 17:26:36,746 - __main__ - INFO - Queue remaining: 29
  21423. 2025-07-20 17:26:36,746 - __main__ - INFO -
  21424. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21425. ----------------------------------------------------------------------------------
  21426. sglang_input_tokens 151.15 151.15
  21427. sglang_output_tokens 30.68 30.68
  21428. 2025-07-20 17:26:36,746 - __main__ - INFO -
  21429. Worker ID | finished | started
  21430. ----------+----------+--------
  21431. 0 | 8 | 11
  21432. 2025-07-20 17:26:37,242 - sglang - INFO - [2025-07-20 17:26:37 TP0] Decode batch. #running-req: 3, #token: 9172, token usage: 0.24, gen throughput (token/s): 139.74, #queue-req: 0
  21433. 2025-07-20 17:26:37,243 - __main__ - INFO - sglang running req: 3 queue req: 0
  21434. 2025-07-20 17:26:38,100 - sglang - INFO - [2025-07-20 17:26:38 TP0] Decode batch. #running-req: 3, #token: 9292, token usage: 0.24, gen throughput (token/s): 139.89, #queue-req: 0
  21435. 2025-07-20 17:26:38,100 - __main__ - INFO - sglang running req: 3 queue req: 0
  21436. 2025-07-20 17:26:38,959 - sglang - INFO - [2025-07-20 17:26:38 TP0] Decode batch. #running-req: 3, #token: 9412, token usage: 0.25, gen throughput (token/s): 139.79, #queue-req: 0
  21437. 2025-07-20 17:26:38,959 - __main__ - INFO - sglang running req: 3 queue req: 0
  21438. 2025-07-20 17:26:39,817 - sglang - INFO - [2025-07-20 17:26:39 TP0] Decode batch. #running-req: 3, #token: 9532, token usage: 0.25, gen throughput (token/s): 139.84, #queue-req: 0
  21439. 2025-07-20 17:26:39,817 - __main__ - INFO - sglang running req: 3 queue req: 0
  21440. 2025-07-20 17:26:40,675 - sglang - INFO - [2025-07-20 17:26:40 TP0] Decode batch. #running-req: 3, #token: 9652, token usage: 0.25, gen throughput (token/s): 139.81, #queue-req: 0
  21441. 2025-07-20 17:26:40,675 - __main__ - INFO - sglang running req: 3 queue req: 0
  21442. 2025-07-20 17:26:41,536 - sglang - INFO - [2025-07-20 17:26:41 TP0] Decode batch. #running-req: 3, #token: 9772, token usage: 0.26, gen throughput (token/s): 139.36, #queue-req: 0
  21443. 2025-07-20 17:26:41,536 - __main__ - INFO - sglang running req: 3 queue req: 0
  21444. 2025-07-20 17:26:42,398 - sglang - INFO - [2025-07-20 17:26:42 TP0] Decode batch. #running-req: 3, #token: 9892, token usage: 0.26, gen throughput (token/s): 139.20, #queue-req: 0
  21445. 2025-07-20 17:26:42,398 - __main__ - INFO - sglang running req: 3 queue req: 0
  21446. 2025-07-20 17:26:43,258 - sglang - INFO - [2025-07-20 17:26:43 TP0] Decode batch. #running-req: 3, #token: 10012, token usage: 0.26, gen throughput (token/s): 139.56, #queue-req: 0
  21447. 2025-07-20 17:26:43,258 - __main__ - INFO - sglang running req: 3 queue req: 0
  21448. 2025-07-20 17:26:44,121 - sglang - INFO - [2025-07-20 17:26:44 TP0] Decode batch. #running-req: 3, #token: 10132, token usage: 0.27, gen throughput (token/s): 138.98, #queue-req: 0
  21449. 2025-07-20 17:26:44,122 - __main__ - INFO - sglang running req: 3 queue req: 0
  21450. 2025-07-20 17:26:44,979 - sglang - INFO - [2025-07-20 17:26:44 TP0] Decode batch. #running-req: 1, #token: 3406, token usage: 0.09, gen throughput (token/s): 104.98, #queue-req: 0
  21451. 2025-07-20 17:26:44,979 - __main__ - INFO - sglang running req: 1 queue req: 0
  21452. 2025-07-20 17:26:45,816 - sglang - INFO - [2025-07-20 17:26:45 TP0] Decode batch. #running-req: 1, #token: 3446, token usage: 0.09, gen throughput (token/s): 47.76, #queue-req: 0
  21453. 2025-07-20 17:26:45,817 - __main__ - INFO - sglang running req: 1 queue req: 0
  21454. 2025-07-20 17:26:46,647 - sglang - INFO - [2025-07-20 17:26:46 TP0] Decode batch. #running-req: 1, #token: 3486, token usage: 0.09, gen throughput (token/s): 48.13, #queue-req: 0
  21455. 2025-07-20 17:26:46,648 - __main__ - INFO - sglang running req: 1 queue req: 0
  21456. 2025-07-20 17:26:46,747 - __main__ - INFO - Queue remaining: 29
  21457. 2025-07-20 17:26:46,747 - __main__ - INFO -
  21458. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21459. ----------------------------------------------------------------------------------
  21460. sglang_input_tokens 180.29 180.29
  21461. sglang_output_tokens 43.04 43.04
  21462. 2025-07-20 17:26:46,748 - __main__ - INFO -
  21463. Worker ID | finished | started
  21464. ----------+----------+--------
  21465. 0 | 10 | 11
  21466. 2025-07-20 17:26:47,475 - sglang - INFO - [2025-07-20 17:26:47 TP0] Decode batch. #running-req: 1, #token: 3526, token usage: 0.09, gen throughput (token/s): 48.33, #queue-req: 0
  21467. 2025-07-20 17:26:47,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  21468. 2025-07-20 17:26:48,302 - sglang - INFO - [2025-07-20 17:26:48 TP0] Decode batch. #running-req: 1, #token: 3566, token usage: 0.09, gen throughput (token/s): 48.35, #queue-req: 0
  21469. 2025-07-20 17:26:48,303 - __main__ - INFO - sglang running req: 1 queue req: 0
  21470. 2025-07-20 17:26:49,139 - sglang - INFO - [2025-07-20 17:26:49 TP0] Decode batch. #running-req: 1, #token: 3606, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
  21471. 2025-07-20 17:26:49,139 - __main__ - INFO - sglang running req: 1 queue req: 0
  21472. 2025-07-20 17:26:49,976 - sglang - INFO - [2025-07-20 17:26:49 TP0] Decode batch. #running-req: 1, #token: 3646, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
  21473. 2025-07-20 17:26:49,976 - __main__ - INFO - sglang running req: 1 queue req: 0
  21474. 2025-07-20 17:26:50,813 - sglang - INFO - [2025-07-20 17:26:50 TP0] Decode batch. #running-req: 1, #token: 3686, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
  21475. 2025-07-20 17:26:50,814 - __main__ - INFO - sglang running req: 1 queue req: 0
  21476. 2025-07-20 17:26:51,652 - sglang - INFO - [2025-07-20 17:26:51 TP0] Decode batch. #running-req: 1, #token: 3726, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
  21477. 2025-07-20 17:26:51,653 - __main__ - INFO - sglang running req: 1 queue req: 0
  21478. 2025-07-20 17:26:52,490 - sglang - INFO - [2025-07-20 17:26:52 TP0] Decode batch. #running-req: 1, #token: 3766, token usage: 0.10, gen throughput (token/s): 47.76, #queue-req: 0
  21479. 2025-07-20 17:26:52,490 - __main__ - INFO - sglang running req: 1 queue req: 0
  21480. 2025-07-20 17:26:53,327 - sglang - INFO - [2025-07-20 17:26:53 TP0] Decode batch. #running-req: 1, #token: 3806, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
  21481. 2025-07-20 17:26:53,327 - __main__ - INFO - sglang running req: 1 queue req: 0
  21482. 2025-07-20 17:26:54,163 - sglang - INFO - [2025-07-20 17:26:54 TP0] Decode batch. #running-req: 1, #token: 3846, token usage: 0.10, gen throughput (token/s): 47.83, #queue-req: 0
  21483. 2025-07-20 17:26:54,164 - __main__ - INFO - sglang running req: 1 queue req: 0
  21484. 2025-07-20 17:26:54,997 - sglang - INFO - [2025-07-20 17:26:54 TP0] Decode batch. #running-req: 1, #token: 3886, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
  21485. 2025-07-20 17:26:54,997 - __main__ - INFO - sglang running req: 1 queue req: 0
  21486. 2025-07-20 17:26:55,830 - sglang - INFO - [2025-07-20 17:26:55 TP0] Decode batch. #running-req: 1, #token: 3926, token usage: 0.10, gen throughput (token/s): 48.00, #queue-req: 0
  21487. 2025-07-20 17:26:55,830 - __main__ - INFO - sglang running req: 1 queue req: 0
  21488. 2025-07-20 17:26:56,670 - sglang - INFO - [2025-07-20 17:26:56 TP0] Decode batch. #running-req: 1, #token: 3966, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
  21489. 2025-07-20 17:26:56,671 - __main__ - INFO - sglang running req: 1 queue req: 0
  21490. 2025-07-20 17:26:56,749 - __main__ - INFO - Queue remaining: 29
  21491. 2025-07-20 17:26:56,749 - __main__ - INFO -
  21492. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21493. ----------------------------------------------------------------------------------
  21494. sglang_input_tokens 166.47 166.47
  21495. sglang_output_tokens 39.74 39.74
  21496. 2025-07-20 17:26:56,750 - __main__ - INFO -
  21497. Worker ID | finished | started
  21498. ----------+----------+--------
  21499. 0 | 10 | 11
  21500. 2025-07-20 17:26:57,508 - sglang - INFO - [2025-07-20 17:26:57 TP0] Decode batch. #running-req: 1, #token: 4006, token usage: 0.11, gen throughput (token/s): 47.73, #queue-req: 0
  21501. 2025-07-20 17:26:57,509 - __main__ - INFO - sglang running req: 1 queue req: 0
  21502. 2025-07-20 17:26:58,347 - sglang - INFO - [2025-07-20 17:26:58 TP0] Decode batch. #running-req: 1, #token: 4046, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
  21503. 2025-07-20 17:26:58,347 - __main__ - INFO - sglang running req: 1 queue req: 0
  21504. 2025-07-20 17:26:59,187 - sglang - INFO - [2025-07-20 17:26:59 TP0] Decode batch. #running-req: 1, #token: 4086, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  21505. 2025-07-20 17:26:59,187 - __main__ - INFO - sglang running req: 1 queue req: 0
  21506. 2025-07-20 17:27:00,027 - sglang - INFO - [2025-07-20 17:27:00 TP0] Decode batch. #running-req: 1, #token: 4126, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
  21507. 2025-07-20 17:27:00,027 - __main__ - INFO - sglang running req: 1 queue req: 0
  21508. 2025-07-20 17:27:00,866 - sglang - INFO - [2025-07-20 17:27:00 TP0] Decode batch. #running-req: 1, #token: 4166, token usage: 0.11, gen throughput (token/s): 47.68, #queue-req: 0
  21509. 2025-07-20 17:27:00,866 - __main__ - INFO - sglang running req: 1 queue req: 0
  21510. 2025-07-20 17:27:01,704 - sglang - INFO - [2025-07-20 17:27:01 TP0] Decode batch. #running-req: 1, #token: 4206, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
  21511. 2025-07-20 17:27:01,704 - __main__ - INFO - sglang running req: 1 queue req: 0
  21512. 2025-07-20 17:27:02,539 - sglang - INFO - [2025-07-20 17:27:02 TP0] Decode batch. #running-req: 1, #token: 4246, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
  21513. 2025-07-20 17:27:02,540 - __main__ - INFO - sglang running req: 1 queue req: 0
  21514. 2025-07-20 17:27:03,375 - sglang - INFO - [2025-07-20 17:27:03 TP0] Decode batch. #running-req: 1, #token: 4286, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
  21515. 2025-07-20 17:27:03,376 - __main__ - INFO - sglang running req: 1 queue req: 0
  21516. 2025-07-20 17:27:04,218 - sglang - INFO - [2025-07-20 17:27:04 TP0] Decode batch. #running-req: 1, #token: 4326, token usage: 0.11, gen throughput (token/s): 47.46, #queue-req: 0
  21517. 2025-07-20 17:27:04,218 - __main__ - INFO - sglang running req: 1 queue req: 0
  21518. 2025-07-20 17:27:05,059 - sglang - INFO - [2025-07-20 17:27:05 TP0] Decode batch. #running-req: 1, #token: 4366, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
  21519. 2025-07-20 17:27:05,059 - __main__ - INFO - sglang running req: 1 queue req: 0
  21520. 2025-07-20 17:27:05,900 - sglang - INFO - [2025-07-20 17:27:05 TP0] Decode batch. #running-req: 1, #token: 4406, token usage: 0.12, gen throughput (token/s): 47.56, #queue-req: 0
  21521. 2025-07-20 17:27:05,900 - __main__ - INFO - sglang running req: 1 queue req: 0
  21522. 2025-07-20 17:27:06,742 - sglang - INFO - [2025-07-20 17:27:06 TP0] Decode batch. #running-req: 1, #token: 4446, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
  21523. 2025-07-20 17:27:06,742 - __main__ - INFO - sglang running req: 1 queue req: 0
  21524. 2025-07-20 17:27:06,751 - __main__ - INFO - Queue remaining: 29
  21525. 2025-07-20 17:27:06,751 - __main__ - INFO -
  21526. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21527. ----------------------------------------------------------------------------------
  21528. sglang_input_tokens 154.62 154.62
  21529. sglang_output_tokens 36.91 36.91
  21530. 2025-07-20 17:27:06,751 - __main__ - INFO -
  21531. Worker ID | finished | started
  21532. ----------+----------+--------
  21533. 0 | 10 | 11
  21534. 2025-07-20 17:27:07,584 - sglang - INFO - [2025-07-20 17:27:07 TP0] Decode batch. #running-req: 1, #token: 4486, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  21535. 2025-07-20 17:27:07,584 - __main__ - INFO - sglang running req: 1 queue req: 0
  21536. 2025-07-20 17:27:08,425 - sglang - INFO - [2025-07-20 17:27:08 TP0] Decode batch. #running-req: 1, #token: 4526, token usage: 0.12, gen throughput (token/s): 47.54, #queue-req: 0
  21537. 2025-07-20 17:27:08,425 - __main__ - INFO - sglang running req: 1 queue req: 0
  21538. 2025-07-20 17:27:09,263 - sglang - INFO - [2025-07-20 17:27:09 TP0] Decode batch. #running-req: 1, #token: 4566, token usage: 0.12, gen throughput (token/s): 47.71, #queue-req: 0
  21539. 2025-07-20 17:27:09,264 - __main__ - INFO - sglang running req: 1 queue req: 0
  21540. 2025-07-20 17:27:10,101 - sglang - INFO - [2025-07-20 17:27:10 TP0] Decode batch. #running-req: 1, #token: 4606, token usage: 0.12, gen throughput (token/s): 47.75, #queue-req: 0
  21541. 2025-07-20 17:27:10,101 - __main__ - INFO - sglang running req: 1 queue req: 0
  21542. 2025-07-20 17:27:10,940 - sglang - INFO - [2025-07-20 17:27:10 TP0] Decode batch. #running-req: 1, #token: 4646, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
  21543. 2025-07-20 17:27:10,940 - __main__ - INFO - sglang running req: 1 queue req: 0
  21544. 2025-07-20 17:27:11,784 - sglang - INFO - [2025-07-20 17:27:11 TP0] Decode batch. #running-req: 1, #token: 4686, token usage: 0.12, gen throughput (token/s): 47.38, #queue-req: 0
  21545. 2025-07-20 17:27:11,784 - __main__ - INFO - sglang running req: 1 queue req: 0
  21546. 2025-07-20 17:27:12,629 - sglang - INFO - [2025-07-20 17:27:12 TP0] Decode batch. #running-req: 1, #token: 4726, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
  21547. 2025-07-20 17:27:12,629 - __main__ - INFO - sglang running req: 1 queue req: 0
  21548. 2025-07-20 17:27:13,472 - sglang - INFO - [2025-07-20 17:27:13 TP0] Decode batch. #running-req: 1, #token: 4766, token usage: 0.13, gen throughput (token/s): 47.41, #queue-req: 0
  21549. 2025-07-20 17:27:13,472 - __main__ - INFO - sglang running req: 1 queue req: 0
  21550. 2025-07-20 17:27:14,316 - sglang - INFO - [2025-07-20 17:27:14 TP0] Decode batch. #running-req: 1, #token: 4806, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
  21551. 2025-07-20 17:27:14,316 - __main__ - INFO - sglang running req: 1 queue req: 0
  21552. 2025-07-20 17:27:15,159 - sglang - INFO - [2025-07-20 17:27:15 TP0] Decode batch. #running-req: 1, #token: 4846, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
  21553. 2025-07-20 17:27:15,159 - __main__ - INFO - sglang running req: 1 queue req: 0
  21554. 2025-07-20 17:27:16,002 - sglang - INFO - [2025-07-20 17:27:16 TP0] Decode batch. #running-req: 1, #token: 4886, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
  21555. 2025-07-20 17:27:16,002 - __main__ - INFO - sglang running req: 1 queue req: 0
  21556. 2025-07-20 17:27:16,752 - __main__ - INFO - Queue remaining: 29
  21557. 2025-07-20 17:27:16,753 - __main__ - INFO -
  21558. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21559. ----------------------------------------------------------------------------------
  21560. sglang_input_tokens 144.34 144.34
  21561. sglang_output_tokens 34.46 34.46
  21562. 2025-07-20 17:27:16,753 - __main__ - INFO -
  21563. Worker ID | finished | started
  21564. ----------+----------+--------
  21565. 0 | 10 | 11
  21566. 2025-07-20 17:27:16,843 - sglang - INFO - [2025-07-20 17:27:16 TP0] Decode batch. #running-req: 1, #token: 4926, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
  21567. 2025-07-20 17:27:16,844 - __main__ - INFO - sglang running req: 1 queue req: 0
  21568. 2025-07-20 17:27:17,685 - sglang - INFO - [2025-07-20 17:27:17 TP0] Decode batch. #running-req: 1, #token: 4966, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
  21569. 2025-07-20 17:27:17,685 - __main__ - INFO - sglang running req: 1 queue req: 0
  21570. 2025-07-20 17:27:18,529 - sglang - INFO - [2025-07-20 17:27:18 TP0] Decode batch. #running-req: 1, #token: 5006, token usage: 0.13, gen throughput (token/s): 47.41, #queue-req: 0
  21571. 2025-07-20 17:27:18,529 - __main__ - INFO - sglang running req: 1 queue req: 0
  21572. 2025-07-20 17:27:19,375 - sglang - INFO - [2025-07-20 17:27:19 TP0] Decode batch. #running-req: 1, #token: 5046, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
  21573. 2025-07-20 17:27:19,375 - __main__ - INFO - sglang running req: 1 queue req: 0
  21574. 2025-07-20 17:27:20,220 - sglang - INFO - [2025-07-20 17:27:20 TP0] Decode batch. #running-req: 1, #token: 5086, token usage: 0.13, gen throughput (token/s): 47.37, #queue-req: 0
  21575. 2025-07-20 17:27:20,220 - __main__ - INFO - sglang running req: 1 queue req: 0
  21576. 2025-07-20 17:27:21,066 - sglang - INFO - [2025-07-20 17:27:21 TP0] Decode batch. #running-req: 1, #token: 5126, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
  21577. 2025-07-20 17:27:21,066 - __main__ - INFO - sglang running req: 1 queue req: 0
  21578. 2025-07-20 17:27:21,912 - sglang - INFO - [2025-07-20 17:27:21 TP0] Decode batch. #running-req: 1, #token: 5166, token usage: 0.14, gen throughput (token/s): 47.26, #queue-req: 0
  21579. 2025-07-20 17:27:21,912 - __main__ - INFO - sglang running req: 1 queue req: 0
  21580. 2025-07-20 17:27:22,045 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604302101.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  21581. 2025-07-20 17:27:22,242 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604302101.pdf-5
  21582. 2025-07-20 17:27:22,476 - sglang - INFO - [2025-07-20 17:27:22 TP0] Prefill batch. #new-seq: 1, #new-token: 2173, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21583. 2025-07-20 17:27:22,477 - __main__ - INFO - sglang running req: 0 queue req: 0
  21584. 2025-07-20 17:27:23,851 - sglang - INFO - [2025-07-20 17:27:23 TP0] Decode batch. #running-req: 1, #token: 2207, token usage: 0.06, gen throughput (token/s): 20.63, #queue-req: 0
  21585. 2025-07-20 17:27:23,851 - __main__ - INFO - sglang running req: 1 queue req: 0
  21586. 2025-07-20 17:27:24,686 - sglang - INFO - [2025-07-20 17:27:24 TP0] Decode batch. #running-req: 1, #token: 2247, token usage: 0.06, gen throughput (token/s): 47.87, #queue-req: 0
  21587. 2025-07-20 17:27:24,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  21588. 2025-07-20 17:27:25,517 - sglang - INFO - [2025-07-20 17:27:25 TP0] Decode batch. #running-req: 1, #token: 2287, token usage: 0.06, gen throughput (token/s): 48.14, #queue-req: 0
  21589. 2025-07-20 17:27:25,517 - __main__ - INFO - sglang running req: 1 queue req: 0
  21590. 2025-07-20 17:27:26,354 - sglang - INFO - [2025-07-20 17:27:26 TP0] Decode batch. #running-req: 1, #token: 2327, token usage: 0.06, gen throughput (token/s): 47.79, #queue-req: 0
  21591. 2025-07-20 17:27:26,355 - __main__ - INFO - sglang running req: 1 queue req: 0
  21592. 2025-07-20 17:27:26,754 - __main__ - INFO - Queue remaining: 29
  21593. 2025-07-20 17:27:26,755 - __main__ - INFO -
  21594. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21595. ----------------------------------------------------------------------------------
  21596. sglang_input_tokens 148.89 148.89
  21597. sglang_output_tokens 51.01 51.01
  21598. 2025-07-20 17:27:26,755 - __main__ - INFO -
  21599. Worker ID | finished | started
  21600. ----------+----------+--------
  21601. 0 | 10 | 11
  21602. 2025-07-20 17:27:27,192 - sglang - INFO - [2025-07-20 17:27:27 TP0] Decode batch. #running-req: 1, #token: 2367, token usage: 0.06, gen throughput (token/s): 47.74, #queue-req: 0
  21603. 2025-07-20 17:27:27,192 - __main__ - INFO - sglang running req: 1 queue req: 0
  21604. 2025-07-20 17:27:28,030 - sglang - INFO - [2025-07-20 17:27:28 TP0] Decode batch. #running-req: 1, #token: 2407, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
  21605. 2025-07-20 17:27:28,030 - __main__ - INFO - sglang running req: 1 queue req: 0
  21606. 2025-07-20 17:27:28,867 - sglang - INFO - [2025-07-20 17:27:28 TP0] Decode batch. #running-req: 1, #token: 2447, token usage: 0.06, gen throughput (token/s): 47.76, #queue-req: 0
  21607. 2025-07-20 17:27:28,867 - __main__ - INFO - sglang running req: 1 queue req: 0
  21608. 2025-07-20 17:27:29,705 - sglang - INFO - [2025-07-20 17:27:29 TP0] Decode batch. #running-req: 1, #token: 2487, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
  21609. 2025-07-20 17:27:29,705 - __main__ - INFO - sglang running req: 1 queue req: 0
  21610. 2025-07-20 17:27:30,543 - sglang - INFO - [2025-07-20 17:27:30 TP0] Decode batch. #running-req: 1, #token: 2527, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  21611. 2025-07-20 17:27:30,543 - __main__ - INFO - sglang running req: 1 queue req: 0
  21612. 2025-07-20 17:27:31,380 - sglang - INFO - [2025-07-20 17:27:31 TP0] Decode batch. #running-req: 1, #token: 2567, token usage: 0.07, gen throughput (token/s): 47.76, #queue-req: 0
  21613. 2025-07-20 17:27:31,380 - __main__ - INFO - sglang running req: 1 queue req: 0
  21614. 2025-07-20 17:27:32,216 - sglang - INFO - [2025-07-20 17:27:32 TP0] Decode batch. #running-req: 1, #token: 2607, token usage: 0.07, gen throughput (token/s): 47.85, #queue-req: 0
  21615. 2025-07-20 17:27:32,216 - __main__ - INFO - sglang running req: 1 queue req: 0
  21616. 2025-07-20 17:27:33,047 - sglang - INFO - [2025-07-20 17:27:33 TP0] Decode batch. #running-req: 1, #token: 2647, token usage: 0.07, gen throughput (token/s): 48.13, #queue-req: 0
  21617. 2025-07-20 17:27:33,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  21618. 2025-07-20 17:27:33,872 - __main__ - INFO - Finished TaskGroup for worker on b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
  21619. 2025-07-20 17:27:33,872 - __main__ - INFO - Got 1 docs for b3152b4cd8ddb87e2ad8e5fbf7906815031ce44f
  21620. 2025-07-20 17:27:33,874 - __main__ - INFO - Worker 0 processing work item 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
  21621. 2025-07-20 17:27:33,874 - __main__ - INFO - Created all tasks for 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
  21622. 2025-07-20 17:27:33,882 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301801.pdf in worker 0
  21623. 2025-07-20 17:27:34,015 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-9
  21624. 2025-07-20 17:27:34,022 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-4
  21625. 2025-07-20 17:27:34,025 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-6
  21626. 2025-07-20 17:27:34,041 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-2
  21627. 2025-07-20 17:27:34,073 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-5
  21628. 2025-07-20 17:27:34,090 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-3
  21629. 2025-07-20 17:27:34,123 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-1
  21630. 2025-07-20 17:27:34,128 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-8
  21631. 2025-07-20 17:27:34,150 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301801.pdf-7
  21632. 2025-07-20 17:27:34,166 - sglang - INFO - [2025-07-20 17:27:34 TP0] Prefill batch. #new-seq: 1, #new-token: 1713, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21633. 2025-07-20 17:27:34,166 - __main__ - INFO - sglang running req: 0 queue req: 0
  21634. 2025-07-20 17:27:34,918 - sglang - INFO - [2025-07-20 17:27:34 TP0] Prefill batch. #new-seq: 6, #new-token: 14259, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
  21635. 2025-07-20 17:27:34,918 - __main__ - INFO - sglang running req: 1 queue req: 2
  21636. 2025-07-20 17:27:36,756 - __main__ - INFO - Queue remaining: 28
  21637. 2025-07-20 17:27:36,757 - __main__ - INFO -
  21638. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21639. ----------------------------------------------------------------------------------
  21640. finished_input_tokens 140.15 140.15
  21641. finished_output_tokens 33.43 33.43
  21642. sglang_input_tokens 152.90 152.90
  21643. sglang_output_tokens 51.03 51.03
  21644. 2025-07-20 17:27:36,757 - __main__ - INFO -
  21645. Worker ID | started
  21646. ----------+--------
  21647. 0 | 9
  21648. 2025-07-20 17:27:39,155 - sglang - INFO - [2025-07-20 17:27:39 TP0] Decode batch. #running-req: 7, #token: 15979, token usage: 0.42, gen throughput (token/s): 7.53, #queue-req: 2
  21649. 2025-07-20 17:27:39,155 - __main__ - INFO - sglang running req: 7 queue req: 2
  21650. 2025-07-20 17:27:40,041 - sglang - INFO - [2025-07-20 17:27:40 TP0] Decode batch. #running-req: 7, #token: 16259, token usage: 0.43, gen throughput (token/s): 315.80, #queue-req: 2
  21651. 2025-07-20 17:27:40,041 - __main__ - INFO - sglang running req: 7 queue req: 2
  21652. 2025-07-20 17:27:40,932 - sglang - INFO - [2025-07-20 17:27:40 TP0] Decode batch. #running-req: 7, #token: 16539, token usage: 0.44, gen throughput (token/s): 314.16, #queue-req: 2
  21653. 2025-07-20 17:27:40,933 - __main__ - INFO - sglang running req: 7 queue req: 2
  21654. 2025-07-20 17:27:41,829 - sglang - INFO - [2025-07-20 17:27:41 TP0] Decode batch. #running-req: 7, #token: 16819, token usage: 0.44, gen throughput (token/s): 312.18, #queue-req: 2
  21655. 2025-07-20 17:27:41,829 - __main__ - INFO - sglang running req: 7 queue req: 2
  21656. 2025-07-20 17:27:42,726 - sglang - INFO - [2025-07-20 17:27:42 TP0] Decode batch. #running-req: 7, #token: 17099, token usage: 0.45, gen throughput (token/s): 312.45, #queue-req: 2
  21657. 2025-07-20 17:27:42,726 - __main__ - INFO - sglang running req: 7 queue req: 2
  21658. 2025-07-20 17:27:43,621 - sglang - INFO - [2025-07-20 17:27:43 TP0] Decode batch. #running-req: 7, #token: 17379, token usage: 0.46, gen throughput (token/s): 312.55, #queue-req: 2
  21659. 2025-07-20 17:27:43,622 - __main__ - INFO - sglang running req: 7 queue req: 2
  21660. 2025-07-20 17:27:44,517 - sglang - INFO - [2025-07-20 17:27:44 TP0] Decode batch. #running-req: 7, #token: 17659, token usage: 0.46, gen throughput (token/s): 312.52, #queue-req: 2
  21661. 2025-07-20 17:27:44,518 - __main__ - INFO - sglang running req: 7 queue req: 2
  21662. 2025-07-20 17:27:44,967 - sglang - INFO - [2025-07-20 17:27:44 TP0] Prefill batch. #new-seq: 2, #new-token: 4599, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.41, #running-req: 6, #queue-req: 0
  21663. 2025-07-20 17:27:44,967 - __main__ - INFO - sglang running req: 6 queue req: 0
  21664. 2025-07-20 17:27:46,758 - __main__ - INFO - Queue remaining: 28
  21665. 2025-07-20 17:27:46,758 - __main__ - INFO -
  21666. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21667. ----------------------------------------------------------------------------------
  21668. finished_input_tokens 132.38 132.38
  21669. finished_output_tokens 31.58 31.58
  21670. sglang_input_tokens 164.64 164.64
  21671. sglang_output_tokens 51.12 51.12
  21672. 2025-07-20 17:27:46,758 - __main__ - INFO -
  21673. Worker ID | finished | started
  21674. ----------+----------+--------
  21675. 0 | 2 | 9
  21676. 2025-07-20 17:27:46,813 - sglang - INFO - [2025-07-20 17:27:46 TP0] Decode batch. #running-req: 7, #token: 18368, token usage: 0.48, gen throughput (token/s): 122.87, #queue-req: 0
  21677. 2025-07-20 17:27:46,813 - __main__ - INFO - sglang running req: 7 queue req: 0
  21678. 2025-07-20 17:27:47,702 - sglang - INFO - [2025-07-20 17:27:47 TP0] Decode batch. #running-req: 7, #token: 18648, token usage: 0.49, gen throughput (token/s): 314.82, #queue-req: 0
  21679. 2025-07-20 17:27:47,702 - __main__ - INFO - sglang running req: 7 queue req: 0
  21680. 2025-07-20 17:27:48,593 - sglang - INFO - [2025-07-20 17:27:48 TP0] Decode batch. #running-req: 7, #token: 18928, token usage: 0.50, gen throughput (token/s): 314.27, #queue-req: 0
  21681. 2025-07-20 17:27:48,593 - __main__ - INFO - sglang running req: 7 queue req: 0
  21682. 2025-07-20 17:27:49,487 - sglang - INFO - [2025-07-20 17:27:49 TP0] Decode batch. #running-req: 6, #token: 16554, token usage: 0.44, gen throughput (token/s): 281.73, #queue-req: 0
  21683. 2025-07-20 17:27:49,487 - __main__ - INFO - sglang running req: 6 queue req: 0
  21684. 2025-07-20 17:27:50,380 - sglang - INFO - [2025-07-20 17:27:50 TP0] Decode batch. #running-req: 6, #token: 16794, token usage: 0.44, gen throughput (token/s): 268.91, #queue-req: 0
  21685. 2025-07-20 17:27:50,380 - __main__ - INFO - sglang running req: 6 queue req: 0
  21686. 2025-07-20 17:27:51,264 - sglang - INFO - [2025-07-20 17:27:51 TP0] Decode batch. #running-req: 4, #token: 11257, token usage: 0.30, gen throughput (token/s): 213.78, #queue-req: 0
  21687. 2025-07-20 17:27:51,264 - __main__ - INFO - sglang running req: 4 queue req: 0
  21688. 2025-07-20 17:27:52,134 - sglang - INFO - [2025-07-20 17:27:52 TP0] Decode batch. #running-req: 4, #token: 11417, token usage: 0.30, gen throughput (token/s): 183.81, #queue-req: 0
  21689. 2025-07-20 17:27:52,135 - __main__ - INFO - sglang running req: 4 queue req: 0
  21690. 2025-07-20 17:27:53,005 - sglang - INFO - [2025-07-20 17:27:53 TP0] Decode batch. #running-req: 4, #token: 11577, token usage: 0.30, gen throughput (token/s): 183.78, #queue-req: 0
  21691. 2025-07-20 17:27:53,005 - __main__ - INFO - sglang running req: 4 queue req: 0
  21692. 2025-07-20 17:27:53,872 - sglang - INFO - [2025-07-20 17:27:53 TP0] Decode batch. #running-req: 3, #token: 8484, token usage: 0.22, gen throughput (token/s): 171.87, #queue-req: 0
  21693. 2025-07-20 17:27:53,872 - __main__ - INFO - sglang running req: 3 queue req: 0
  21694. 2025-07-20 17:27:54,729 - sglang - INFO - [2025-07-20 17:27:54 TP0] Decode batch. #running-req: 3, #token: 8604, token usage: 0.23, gen throughput (token/s): 139.94, #queue-req: 0
  21695. 2025-07-20 17:27:54,730 - __main__ - INFO - sglang running req: 3 queue req: 0
  21696. 2025-07-20 17:27:55,578 - sglang - INFO - [2025-07-20 17:27:55 TP0] Decode batch. #running-req: 2, #token: 5439, token usage: 0.14, gen throughput (token/s): 117.83, #queue-req: 0
  21697. 2025-07-20 17:27:55,578 - __main__ - INFO - sglang running req: 2 queue req: 0
  21698. 2025-07-20 17:27:56,427 - sglang - INFO - [2025-07-20 17:27:56 TP0] Decode batch. #running-req: 2, #token: 5519, token usage: 0.15, gen throughput (token/s): 94.21, #queue-req: 0
  21699. 2025-07-20 17:27:56,428 - __main__ - INFO - sglang running req: 2 queue req: 0
  21700. 2025-07-20 17:27:56,759 - __main__ - INFO - Queue remaining: 28
  21701. 2025-07-20 17:27:56,760 - __main__ - INFO -
  21702. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21703. ----------------------------------------------------------------------------------
  21704. finished_input_tokens 125.43 125.43
  21705. finished_output_tokens 29.92 29.92
  21706. sglang_input_tokens 220.69 220.69
  21707. sglang_output_tokens 61.77 61.77
  21708. 2025-07-20 17:27:56,760 - __main__ - INFO -
  21709. Worker ID | finished | started
  21710. ----------+----------+--------
  21711. 0 | 7 | 9
  21712. 2025-07-20 17:27:57,277 - sglang - INFO - [2025-07-20 17:27:57 TP0] Decode batch. #running-req: 2, #token: 5599, token usage: 0.15, gen throughput (token/s): 94.17, #queue-req: 0
  21713. 2025-07-20 17:27:57,277 - __main__ - INFO - sglang running req: 2 queue req: 0
  21714. 2025-07-20 17:27:58,124 - sglang - INFO - [2025-07-20 17:27:58 TP0] Decode batch. #running-req: 2, #token: 5679, token usage: 0.15, gen throughput (token/s): 94.42, #queue-req: 0
  21715. 2025-07-20 17:27:58,124 - __main__ - INFO - sglang running req: 2 queue req: 0
  21716. 2025-07-20 17:27:58,973 - sglang - INFO - [2025-07-20 17:27:58 TP0] Decode batch. #running-req: 2, #token: 5759, token usage: 0.15, gen throughput (token/s): 94.26, #queue-req: 0
  21717. 2025-07-20 17:27:58,973 - __main__ - INFO - sglang running req: 2 queue req: 0
  21718. 2025-07-20 17:27:59,823 - sglang - INFO - [2025-07-20 17:27:59 TP0] Decode batch. #running-req: 2, #token: 5839, token usage: 0.15, gen throughput (token/s): 94.13, #queue-req: 0
  21719. 2025-07-20 17:27:59,823 - __main__ - INFO - sglang running req: 2 queue req: 0
  21720. 2025-07-20 17:28:00,673 - sglang - INFO - [2025-07-20 17:28:00 TP0] Decode batch. #running-req: 2, #token: 5919, token usage: 0.16, gen throughput (token/s): 94.13, #queue-req: 0
  21721. 2025-07-20 17:28:00,673 - __main__ - INFO - sglang running req: 2 queue req: 0
  21722. 2025-07-20 17:28:01,519 - sglang - INFO - [2025-07-20 17:28:01 TP0] Decode batch. #running-req: 2, #token: 5999, token usage: 0.16, gen throughput (token/s): 94.49, #queue-req: 0
  21723. 2025-07-20 17:28:01,520 - __main__ - INFO - sglang running req: 2 queue req: 0
  21724. 2025-07-20 17:28:02,363 - sglang - INFO - [2025-07-20 17:28:02 TP0] Decode batch. #running-req: 2, #token: 6079, token usage: 0.16, gen throughput (token/s): 94.87, #queue-req: 0
  21725. 2025-07-20 17:28:02,363 - __main__ - INFO - sglang running req: 2 queue req: 0
  21726. 2025-07-20 17:28:03,207 - sglang - INFO - [2025-07-20 17:28:03 TP0] Decode batch. #running-req: 2, #token: 6159, token usage: 0.16, gen throughput (token/s): 94.75, #queue-req: 0
  21727. 2025-07-20 17:28:03,207 - __main__ - INFO - sglang running req: 2 queue req: 0
  21728. 2025-07-20 17:28:04,058 - sglang - INFO - [2025-07-20 17:28:04 TP0] Decode batch. #running-req: 2, #token: 6239, token usage: 0.16, gen throughput (token/s): 93.96, #queue-req: 0
  21729. 2025-07-20 17:28:04,059 - __main__ - INFO - sglang running req: 2 queue req: 0
  21730. 2025-07-20 17:28:04,904 - sglang - INFO - [2025-07-20 17:28:04 TP0] Decode batch. #running-req: 1, #token: 3177, token usage: 0.08, gen throughput (token/s): 72.10, #queue-req: 0
  21731. 2025-07-20 17:28:04,905 - __main__ - INFO - sglang running req: 1 queue req: 0
  21732. 2025-07-20 17:28:05,562 - __main__ - INFO - Finished TaskGroup for worker on 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
  21733. 2025-07-20 17:28:05,562 - __main__ - INFO - Got 1 docs for 0c3e9a89b35c3045b6a67f7cd5c06009a31d750f
  21734. 2025-07-20 17:28:05,563 - __main__ - INFO - Worker 0 processing work item 10dc5d29c3f17870daf918c9555cd0b939acbffe
  21735. 2025-07-20 17:28:05,563 - __main__ - INFO - Created all tasks for 10dc5d29c3f17870daf918c9555cd0b939acbffe
  21736. 2025-07-20 17:28:05,570 - __main__ - INFO - Got 12 pages to do for test_pdf/1144520000702630XG344010604301101.pdf in worker 0
  21737. 2025-07-20 17:28:05,672 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-9
  21738. 2025-07-20 17:28:05,706 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-12
  21739. 2025-07-20 17:28:05,713 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-3
  21740. 2025-07-20 17:28:05,718 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-2
  21741. 2025-07-20 17:28:05,728 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-6
  21742. 2025-07-20 17:28:05,736 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-5
  21743. 2025-07-20 17:28:05,747 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-7
  21744. 2025-07-20 17:28:05,753 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-8
  21745. 2025-07-20 17:28:05,758 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-4
  21746. 2025-07-20 17:28:05,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-1
  21747. 2025-07-20 17:28:05,816 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-10
  21748. 2025-07-20 17:28:05,835 - sglang - INFO - [2025-07-20 17:28:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1485, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21749. 2025-07-20 17:28:05,835 - __main__ - INFO - sglang running req: 0 queue req: 0
  21750. 2025-07-20 17:28:05,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301101.pdf-11
  21751. 2025-07-20 17:28:06,761 - __main__ - INFO - Queue remaining: 27
  21752. 2025-07-20 17:28:06,762 - __main__ - INFO -
  21753. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21754. ----------------------------------------------------------------------------------
  21755. finished_input_tokens 221.79 221.79
  21756. finished_output_tokens 52.37 52.37
  21757. sglang_input_tokens 232.62 232.62
  21758. sglang_output_tokens 67.34 67.34
  21759. 2025-07-20 17:28:06,762 - __main__ - INFO -
  21760. Worker ID | started
  21761. ----------+--------
  21762. 0 | 12
  21763. 2025-07-20 17:28:07,199 - sglang - INFO - [2025-07-20 17:28:07 TP0] Prefill batch. #new-seq: 6, #new-token: 12589, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 5
  21764. 2025-07-20 17:28:07,200 - __main__ - INFO - sglang running req: 1 queue req: 5
  21765. 2025-07-20 17:28:11,289 - sglang - INFO - [2025-07-20 17:28:11 TP0] Decode batch. #running-req: 7, #token: 14137, token usage: 0.37, gen throughput (token/s): 14.72, #queue-req: 5
  21766. 2025-07-20 17:28:11,290 - __main__ - INFO - sglang running req: 7 queue req: 5
  21767. 2025-07-20 17:28:12,176 - sglang - INFO - [2025-07-20 17:28:12 TP0] Decode batch. #running-req: 7, #token: 14417, token usage: 0.38, gen throughput (token/s): 315.67, #queue-req: 5
  21768. 2025-07-20 17:28:12,177 - __main__ - INFO - sglang running req: 7 queue req: 5
  21769. 2025-07-20 17:28:13,062 - sglang - INFO - [2025-07-20 17:28:13 TP0] Decode batch. #running-req: 7, #token: 14697, token usage: 0.39, gen throughput (token/s): 316.16, #queue-req: 5
  21770. 2025-07-20 17:28:13,062 - __main__ - INFO - sglang running req: 7 queue req: 5
  21771. 2025-07-20 17:28:13,952 - sglang - INFO - [2025-07-20 17:28:13 TP0] Decode batch. #running-req: 7, #token: 14977, token usage: 0.39, gen throughput (token/s): 314.76, #queue-req: 5
  21772. 2025-07-20 17:28:13,952 - __main__ - INFO - sglang running req: 7 queue req: 5
  21773. 2025-07-20 17:28:14,842 - sglang - INFO - [2025-07-20 17:28:14 TP0] Decode batch. #running-req: 7, #token: 15257, token usage: 0.40, gen throughput (token/s): 314.66, #queue-req: 5
  21774. 2025-07-20 17:28:14,842 - __main__ - INFO - sglang running req: 7 queue req: 5
  21775. 2025-07-20 17:28:15,730 - sglang - INFO - [2025-07-20 17:28:15 TP0] Decode batch. #running-req: 7, #token: 15537, token usage: 0.41, gen throughput (token/s): 315.13, #queue-req: 5
  21776. 2025-07-20 17:28:15,730 - __main__ - INFO - sglang running req: 7 queue req: 5
  21777. 2025-07-20 17:28:16,197 - sglang - INFO - [2025-07-20 17:28:16 TP0] Prefill batch. #new-seq: 2, #new-token: 5286, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 3
  21778. 2025-07-20 17:28:16,197 - __main__ - INFO - sglang running req: 6 queue req: 3
  21779. 2025-07-20 17:28:16,764 - __main__ - INFO - Queue remaining: 27
  21780. 2025-07-20 17:28:16,764 - __main__ - INFO -
  21781. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21782. ----------------------------------------------------------------------------------
  21783. finished_input_tokens 211.25 211.25
  21784. finished_output_tokens 49.88 49.88
  21785. sglang_input_tokens 228.62 228.62
  21786. sglang_output_tokens 65.23 65.23
  21787. 2025-07-20 17:28:16,765 - __main__ - INFO -
  21788. Worker ID | finished | started
  21789. ----------+----------+--------
  21790. 0 | 1 | 12
  21791. 2025-07-20 17:28:18,160 - sglang - INFO - [2025-07-20 17:28:18 TP0] Decode batch. #running-req: 8, #token: 19407, token usage: 0.51, gen throughput (token/s): 122.64, #queue-req: 3
  21792. 2025-07-20 17:28:18,160 - __main__ - INFO - sglang running req: 8 queue req: 3
  21793. 2025-07-20 17:28:18,497 - sglang - INFO - [2025-07-20 17:28:18 TP0] Prefill batch. #new-seq: 2, #new-token: 4907, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.46, #running-req: 7, #queue-req: 1
  21794. 2025-07-20 17:28:18,498 - __main__ - INFO - sglang running req: 7 queue req: 1
  21795. 2025-07-20 17:28:20,558 - sglang - INFO - [2025-07-20 17:28:20 TP0] Decode batch. #running-req: 9, #token: 22604, token usage: 0.60, gen throughput (token/s): 143.43, #queue-req: 1
  21796. 2025-07-20 17:28:20,559 - __main__ - INFO - sglang running req: 9 queue req: 1
  21797. 2025-07-20 17:28:21,518 - sglang - INFO - [2025-07-20 17:28:21 TP0] Decode batch. #running-req: 9, #token: 22964, token usage: 0.60, gen throughput (token/s): 375.31, #queue-req: 1
  21798. 2025-07-20 17:28:21,518 - __main__ - INFO - sglang running req: 9 queue req: 1
  21799. 2025-07-20 17:28:21,685 - sglang - INFO - [2025-07-20 17:28:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2316, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 0
  21800. 2025-07-20 17:28:21,685 - __main__ - INFO - sglang running req: 8 queue req: 0
  21801. 2025-07-20 17:28:23,224 - sglang - INFO - [2025-07-20 17:28:23 TP0] Decode batch. #running-req: 9, #token: 23144, token usage: 0.61, gen throughput (token/s): 210.34, #queue-req: 0
  21802. 2025-07-20 17:28:23,225 - __main__ - INFO - sglang running req: 9 queue req: 0
  21803. 2025-07-20 17:28:24,182 - sglang - INFO - [2025-07-20 17:28:24 TP0] Decode batch. #running-req: 9, #token: 23504, token usage: 0.62, gen throughput (token/s): 375.79, #queue-req: 0
  21804. 2025-07-20 17:28:24,183 - __main__ - INFO - sglang running req: 9 queue req: 0
  21805. 2025-07-20 17:28:25,139 - sglang - INFO - [2025-07-20 17:28:25 TP0] Decode batch. #running-req: 9, #token: 23864, token usage: 0.63, gen throughput (token/s): 376.40, #queue-req: 0
  21806. 2025-07-20 17:28:25,139 - __main__ - INFO - sglang running req: 9 queue req: 0
  21807. 2025-07-20 17:28:26,051 - sglang - INFO - [2025-07-20 17:28:26 TP0] Decode batch. #running-req: 8, #token: 21936, token usage: 0.58, gen throughput (token/s): 358.60, #queue-req: 0
  21808. 2025-07-20 17:28:26,051 - __main__ - INFO - sglang running req: 8 queue req: 0
  21809. 2025-07-20 17:28:26,766 - __main__ - INFO - Queue remaining: 27
  21810. 2025-07-20 17:28:26,767 - __main__ - INFO -
  21811. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21812. ----------------------------------------------------------------------------------
  21813. finished_input_tokens 201.66 201.66
  21814. finished_output_tokens 47.62 47.62
  21815. sglang_input_tokens 264.74 264.74
  21816. sglang_output_tokens 71.56 71.56
  21817. 2025-07-20 17:28:26,767 - __main__ - INFO -
  21818. Worker ID | finished | started
  21819. ----------+----------+--------
  21820. 0 | 6 | 12
  21821. 2025-07-20 17:28:26,948 - sglang - INFO - [2025-07-20 17:28:26 TP0] Decode batch. #running-req: 6, #token: 16699, token usage: 0.44, gen throughput (token/s): 280.85, #queue-req: 0
  21822. 2025-07-20 17:28:26,948 - __main__ - INFO - sglang running req: 6 queue req: 0
  21823. 2025-07-20 17:28:27,842 - sglang - INFO - [2025-07-20 17:28:27 TP0] Decode batch. #running-req: 6, #token: 16939, token usage: 0.45, gen throughput (token/s): 268.53, #queue-req: 0
  21824. 2025-07-20 17:28:27,842 - __main__ - INFO - sglang running req: 6 queue req: 0
  21825. 2025-07-20 17:28:28,729 - sglang - INFO - [2025-07-20 17:28:28 TP0] Decode batch. #running-req: 5, #token: 14230, token usage: 0.37, gen throughput (token/s): 235.47, #queue-req: 0
  21826. 2025-07-20 17:28:28,730 - __main__ - INFO - sglang running req: 5 queue req: 0
  21827. 2025-07-20 17:28:29,616 - sglang - INFO - [2025-07-20 17:28:29 TP0] Decode batch. #running-req: 5, #token: 14430, token usage: 0.38, gen throughput (token/s): 225.45, #queue-req: 0
  21828. 2025-07-20 17:28:29,617 - __main__ - INFO - sglang running req: 5 queue req: 0
  21829. 2025-07-20 17:28:30,501 - sglang - INFO - [2025-07-20 17:28:30 TP0] Decode batch. #running-req: 5, #token: 14630, token usage: 0.39, gen throughput (token/s): 226.05, #queue-req: 0
  21830. 2025-07-20 17:28:30,501 - __main__ - INFO - sglang running req: 5 queue req: 0
  21831. 2025-07-20 17:28:31,379 - sglang - INFO - [2025-07-20 17:28:31 TP0] Decode batch. #running-req: 4, #token: 12121, token usage: 0.32, gen throughput (token/s): 199.38, #queue-req: 0
  21832. 2025-07-20 17:28:31,379 - __main__ - INFO - sglang running req: 4 queue req: 0
  21833. 2025-07-20 17:28:32,248 - sglang - INFO - [2025-07-20 17:28:32 TP0] Decode batch. #running-req: 4, #token: 12281, token usage: 0.32, gen throughput (token/s): 184.12, #queue-req: 0
  21834. 2025-07-20 17:28:32,248 - __main__ - INFO - sglang running req: 4 queue req: 0
  21835. 2025-07-20 17:28:33,115 - sglang - INFO - [2025-07-20 17:28:33 TP0] Decode batch. #running-req: 3, #token: 9307, token usage: 0.25, gen throughput (token/s): 181.02, #queue-req: 0
  21836. 2025-07-20 17:28:33,115 - __main__ - INFO - sglang running req: 3 queue req: 0
  21837. 2025-07-20 17:28:33,979 - sglang - INFO - [2025-07-20 17:28:33 TP0] Decode batch. #running-req: 3, #token: 9427, token usage: 0.25, gen throughput (token/s): 138.89, #queue-req: 0
  21838. 2025-07-20 17:28:33,980 - __main__ - INFO - sglang running req: 3 queue req: 0
  21839. 2025-07-20 17:28:34,846 - sglang - INFO - [2025-07-20 17:28:34 TP0] Decode batch. #running-req: 3, #token: 9547, token usage: 0.25, gen throughput (token/s): 138.43, #queue-req: 0
  21840. 2025-07-20 17:28:34,846 - __main__ - INFO - sglang running req: 3 queue req: 0
  21841. 2025-07-20 17:28:35,697 - sglang - INFO - [2025-07-20 17:28:35 TP0] Decode batch. #running-req: 1, #token: 2983, token usage: 0.08, gen throughput (token/s): 89.34, #queue-req: 0
  21842. 2025-07-20 17:28:35,697 - __main__ - INFO - sglang running req: 1 queue req: 0
  21843. 2025-07-20 17:28:36,537 - sglang - INFO - [2025-07-20 17:28:36 TP0] Decode batch. #running-req: 1, #token: 3023, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
  21844. 2025-07-20 17:28:36,537 - __main__ - INFO - sglang running req: 1 queue req: 0
  21845. 2025-07-20 17:28:36,768 - __main__ - INFO - Queue remaining: 27
  21846. 2025-07-20 17:28:36,768 - __main__ - INFO -
  21847. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21848. ----------------------------------------------------------------------------------
  21849. finished_input_tokens 192.91 192.91
  21850. finished_output_tokens 45.55 45.55
  21851. sglang_input_tokens 307.62 307.62
  21852. sglang_output_tokens 80.81 80.81
  21853. 2025-07-20 17:28:36,768 - __main__ - INFO -
  21854. Worker ID | finished | started
  21855. ----------+----------+--------
  21856. 0 | 11 | 12
  21857. 2025-07-20 17:28:37,376 - sglang - INFO - [2025-07-20 17:28:37 TP0] Decode batch. #running-req: 1, #token: 3063, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
  21858. 2025-07-20 17:28:37,377 - __main__ - INFO - sglang running req: 1 queue req: 0
  21859. 2025-07-20 17:28:38,216 - sglang - INFO - [2025-07-20 17:28:38 TP0] Decode batch. #running-req: 1, #token: 3103, token usage: 0.08, gen throughput (token/s): 47.63, #queue-req: 0
  21860. 2025-07-20 17:28:38,217 - __main__ - INFO - sglang running req: 1 queue req: 0
  21861. 2025-07-20 17:28:39,054 - sglang - INFO - [2025-07-20 17:28:39 TP0] Decode batch. #running-req: 1, #token: 3143, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
  21862. 2025-07-20 17:28:39,054 - __main__ - INFO - sglang running req: 1 queue req: 0
  21863. 2025-07-20 17:28:39,888 - sglang - INFO - [2025-07-20 17:28:39 TP0] Decode batch. #running-req: 1, #token: 3183, token usage: 0.08, gen throughput (token/s): 47.99, #queue-req: 0
  21864. 2025-07-20 17:28:39,888 - __main__ - INFO - sglang running req: 1 queue req: 0
  21865. 2025-07-20 17:28:40,456 - __main__ - INFO - Finished TaskGroup for worker on 10dc5d29c3f17870daf918c9555cd0b939acbffe
  21866. 2025-07-20 17:28:40,456 - __main__ - INFO - Got 1 docs for 10dc5d29c3f17870daf918c9555cd0b939acbffe
  21867. 2025-07-20 17:28:40,458 - __main__ - INFO - Worker 0 processing work item d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
  21868. 2025-07-20 17:28:40,458 - __main__ - INFO - Created all tasks for d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
  21869. 2025-07-20 17:28:40,465 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010604200101.pdf in worker 0
  21870. 2025-07-20 17:28:40,602 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-6
  21871. 2025-07-20 17:28:40,622 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-4
  21872. 2025-07-20 17:28:40,629 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-3
  21873. 2025-07-20 17:28:40,631 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-8
  21874. 2025-07-20 17:28:40,638 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-1
  21875. 2025-07-20 17:28:40,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-7
  21876. 2025-07-20 17:28:40,675 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-2
  21877. 2025-07-20 17:28:40,691 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604200101.pdf-5
  21878. 2025-07-20 17:28:40,827 - sglang - INFO - [2025-07-20 17:28:40 TP0] Prefill batch. #new-seq: 1, #new-token: 2107, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21879. 2025-07-20 17:28:40,827 - __main__ - INFO - sglang running req: 0 queue req: 0
  21880. 2025-07-20 17:28:41,596 - sglang - INFO - [2025-07-20 17:28:41 TP0] Prefill batch. #new-seq: 6, #new-token: 13079, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 1
  21881. 2025-07-20 17:28:41,596 - __main__ - INFO - sglang running req: 1 queue req: 1
  21882. 2025-07-20 17:28:45,865 - sglang - INFO - [2025-07-20 17:28:45 TP0] Decode batch. #running-req: 7, #token: 15277, token usage: 0.40, gen throughput (token/s): 19.74, #queue-req: 1
  21883. 2025-07-20 17:28:45,865 - __main__ - INFO - sglang running req: 7 queue req: 1
  21884. 2025-07-20 17:28:46,753 - sglang - INFO - [2025-07-20 17:28:46 TP0] Decode batch. #running-req: 7, #token: 15557, token usage: 0.41, gen throughput (token/s): 315.37, #queue-req: 1
  21885. 2025-07-20 17:28:46,753 - __main__ - INFO - sglang running req: 7 queue req: 1
  21886. 2025-07-20 17:28:46,770 - __main__ - INFO - Queue remaining: 26
  21887. 2025-07-20 17:28:46,770 - __main__ - INFO -
  21888. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21889. ----------------------------------------------------------------------------------
  21890. finished_input_tokens 295.43 295.43
  21891. finished_output_tokens 68.69 68.69
  21892. sglang_input_tokens 304.46 304.46
  21893. sglang_output_tokens 81.16 81.16
  21894. 2025-07-20 17:28:46,770 - __main__ - INFO -
  21895. Worker ID | started
  21896. ----------+--------
  21897. 0 | 8
  21898. 2025-07-20 17:28:47,633 - sglang - INFO - [2025-07-20 17:28:47 TP0] Decode batch. #running-req: 7, #token: 15837, token usage: 0.42, gen throughput (token/s): 318.01, #queue-req: 1
  21899. 2025-07-20 17:28:47,634 - __main__ - INFO - sglang running req: 7 queue req: 1
  21900. 2025-07-20 17:28:48,516 - sglang - INFO - [2025-07-20 17:28:48 TP0] Decode batch. #running-req: 7, #token: 16117, token usage: 0.42, gen throughput (token/s): 317.36, #queue-req: 1
  21901. 2025-07-20 17:28:48,516 - __main__ - INFO - sglang running req: 7 queue req: 1
  21902. 2025-07-20 17:28:49,407 - sglang - INFO - [2025-07-20 17:28:49 TP0] Decode batch. #running-req: 7, #token: 16397, token usage: 0.43, gen throughput (token/s): 314.14, #queue-req: 1
  21903. 2025-07-20 17:28:49,407 - __main__ - INFO - sglang running req: 7 queue req: 1
  21904. 2025-07-20 17:28:50,299 - sglang - INFO - [2025-07-20 17:28:50 TP0] Decode batch. #running-req: 7, #token: 16677, token usage: 0.44, gen throughput (token/s): 314.00, #queue-req: 1
  21905. 2025-07-20 17:28:50,299 - __main__ - INFO - sglang running req: 7 queue req: 1
  21906. 2025-07-20 17:28:50,969 - sglang - INFO - [2025-07-20 17:28:50 TP0] Prefill batch. #new-seq: 1, #new-token: 2527, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 0
  21907. 2025-07-20 17:28:50,970 - __main__ - INFO - sglang running req: 6 queue req: 0
  21908. 2025-07-20 17:28:51,946 - sglang - INFO - [2025-07-20 17:28:51 TP0] Decode batch. #running-req: 7, #token: 17261, token usage: 0.45, gen throughput (token/s): 169.35, #queue-req: 0
  21909. 2025-07-20 17:28:51,946 - __main__ - INFO - sglang running req: 7 queue req: 0
  21910. 2025-07-20 17:28:52,842 - sglang - INFO - [2025-07-20 17:28:52 TP0] Decode batch. #running-req: 7, #token: 17541, token usage: 0.46, gen throughput (token/s): 312.64, #queue-req: 0
  21911. 2025-07-20 17:28:52,842 - __main__ - INFO - sglang running req: 7 queue req: 0
  21912. 2025-07-20 17:28:53,733 - sglang - INFO - [2025-07-20 17:28:53 TP0] Decode batch. #running-req: 6, #token: 15468, token usage: 0.41, gen throughput (token/s): 283.97, #queue-req: 0
  21913. 2025-07-20 17:28:53,733 - __main__ - INFO - sglang running req: 6 queue req: 0
  21914. 2025-07-20 17:28:54,620 - sglang - INFO - [2025-07-20 17:28:54 TP0] Decode batch. #running-req: 6, #token: 15708, token usage: 0.41, gen throughput (token/s): 270.48, #queue-req: 0
  21915. 2025-07-20 17:28:54,620 - __main__ - INFO - sglang running req: 6 queue req: 0
  21916. 2025-07-20 17:28:55,498 - sglang - INFO - [2025-07-20 17:28:55 TP0] Decode batch. #running-req: 5, #token: 13251, token usage: 0.35, gen throughput (token/s): 232.29, #queue-req: 0
  21917. 2025-07-20 17:28:55,498 - __main__ - INFO - sglang running req: 5 queue req: 0
  21918. 2025-07-20 17:28:56,382 - sglang - INFO - [2025-07-20 17:28:56 TP0] Decode batch. #running-req: 5, #token: 13451, token usage: 0.35, gen throughput (token/s): 226.28, #queue-req: 0
  21919. 2025-07-20 17:28:56,382 - __main__ - INFO - sglang running req: 5 queue req: 0
  21920. 2025-07-20 17:28:56,771 - __main__ - INFO - Queue remaining: 26
  21921. 2025-07-20 17:28:56,771 - __main__ - INFO -
  21922. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21923. ----------------------------------------------------------------------------------
  21924. finished_input_tokens 283.63 283.63
  21925. finished_output_tokens 65.94 65.94
  21926. sglang_input_tokens 326.78 326.78
  21927. sglang_output_tokens 82.50 82.50
  21928. 2025-07-20 17:28:56,771 - __main__ - INFO -
  21929. Worker ID | finished | started
  21930. ----------+----------+--------
  21931. 0 | 4 | 8
  21932. 2025-07-20 17:28:57,255 - sglang - INFO - [2025-07-20 17:28:57 TP0] Decode batch. #running-req: 4, #token: 11051, token usage: 0.29, gen throughput (token/s): 191.31, #queue-req: 0
  21933. 2025-07-20 17:28:57,255 - __main__ - INFO - sglang running req: 4 queue req: 0
  21934. 2025-07-20 17:28:58,120 - sglang - INFO - [2025-07-20 17:28:58 TP0] Decode batch. #running-req: 3, #token: 8688, token usage: 0.23, gen throughput (token/s): 157.27, #queue-req: 0
  21935. 2025-07-20 17:28:58,120 - __main__ - INFO - sglang running req: 3 queue req: 0
  21936. 2025-07-20 17:28:58,984 - sglang - INFO - [2025-07-20 17:28:58 TP0] Decode batch. #running-req: 3, #token: 8808, token usage: 0.23, gen throughput (token/s): 138.81, #queue-req: 0
  21937. 2025-07-20 17:28:58,985 - __main__ - INFO - sglang running req: 3 queue req: 0
  21938. 2025-07-20 17:28:59,839 - sglang - INFO - [2025-07-20 17:28:59 TP0] Decode batch. #running-req: 1, #token: 3356, token usage: 0.09, gen throughput (token/s): 100.59, #queue-req: 0
  21939. 2025-07-20 17:28:59,840 - __main__ - INFO - sglang running req: 1 queue req: 0
  21940. 2025-07-20 17:29:00,679 - sglang - INFO - [2025-07-20 17:29:00 TP0] Decode batch. #running-req: 1, #token: 3396, token usage: 0.09, gen throughput (token/s): 47.62, #queue-req: 0
  21941. 2025-07-20 17:29:00,680 - __main__ - INFO - sglang running req: 1 queue req: 0
  21942. 2025-07-20 17:29:01,519 - sglang - INFO - [2025-07-20 17:29:01 TP0] Decode batch. #running-req: 1, #token: 3436, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  21943. 2025-07-20 17:29:01,519 - __main__ - INFO - sglang running req: 1 queue req: 0
  21944. 2025-07-20 17:29:02,357 - sglang - INFO - [2025-07-20 17:29:02 TP0] Decode batch. #running-req: 1, #token: 3476, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
  21945. 2025-07-20 17:29:02,357 - __main__ - INFO - sglang running req: 1 queue req: 0
  21946. 2025-07-20 17:29:03,194 - sglang - INFO - [2025-07-20 17:29:03 TP0] Decode batch. #running-req: 1, #token: 3516, token usage: 0.09, gen throughput (token/s): 47.82, #queue-req: 0
  21947. 2025-07-20 17:29:03,194 - __main__ - INFO - sglang running req: 1 queue req: 0
  21948. 2025-07-20 17:29:04,036 - sglang - INFO - [2025-07-20 17:29:04 TP0] Decode batch. #running-req: 1, #token: 3556, token usage: 0.09, gen throughput (token/s): 47.50, #queue-req: 0
  21949. 2025-07-20 17:29:04,036 - __main__ - INFO - sglang running req: 1 queue req: 0
  21950. 2025-07-20 17:29:04,877 - sglang - INFO - [2025-07-20 17:29:04 TP0] Decode batch. #running-req: 1, #token: 3596, token usage: 0.09, gen throughput (token/s): 47.56, #queue-req: 0
  21951. 2025-07-20 17:29:04,877 - __main__ - INFO - sglang running req: 1 queue req: 0
  21952. 2025-07-20 17:29:05,365 - __main__ - INFO - Finished TaskGroup for worker on d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
  21953. 2025-07-20 17:29:05,365 - __main__ - INFO - Got 1 docs for d0cf1cf8644fafcb025a313b4bec083ea97e8c8d
  21954. 2025-07-20 17:29:05,366 - __main__ - INFO - Worker 0 processing work item 2ff00bac5e9500c24956e5386f6e7a49b2b55098
  21955. 2025-07-20 17:29:05,366 - __main__ - INFO - Created all tasks for 2ff00bac5e9500c24956e5386f6e7a49b2b55098
  21956. 2025-07-20 17:29:05,369 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106008000.pdf in worker 0
  21957. 2025-07-20 17:29:05,428 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-5
  21958. 2025-07-20 17:29:05,538 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-3
  21959. 2025-07-20 17:29:05,544 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-2
  21960. 2025-07-20 17:29:05,548 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-1
  21961. 2025-07-20 17:29:05,566 - sglang - INFO - [2025-07-20 17:29:05 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  21962. 2025-07-20 17:29:05,566 - __main__ - INFO - sglang running req: 0 queue req: 0
  21963. 2025-07-20 17:29:05,597 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106008000.pdf-4
  21964. 2025-07-20 17:29:06,061 - sglang - INFO - [2025-07-20 17:29:06 TP0] Prefill batch. #new-seq: 4, #new-token: 8959, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 0
  21965. 2025-07-20 17:29:06,062 - __main__ - INFO - sglang running req: 1 queue req: 0
  21966. 2025-07-20 17:29:06,772 - __main__ - INFO - Queue remaining: 25
  21967. 2025-07-20 17:29:06,772 - __main__ - INFO -
  21968. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21969. ----------------------------------------------------------------------------------
  21970. finished_input_tokens 340.74 340.74
  21971. finished_output_tokens 77.72 77.72
  21972. sglang_input_tokens 349.08 349.08
  21973. sglang_output_tokens 89.24 89.24
  21974. 2025-07-20 17:29:06,773 - __main__ - INFO -
  21975. Worker ID | started
  21976. ----------+--------
  21977. 0 | 5
  21978. 2025-07-20 17:29:09,116 - sglang - INFO - [2025-07-20 17:29:09 TP0] Decode batch. #running-req: 5, #token: 10146, token usage: 0.27, gen throughput (token/s): 25.47, #queue-req: 0
  21979. 2025-07-20 17:29:09,117 - __main__ - INFO - sglang running req: 5 queue req: 0
  21980. 2025-07-20 17:29:09,981 - sglang - INFO - [2025-07-20 17:29:09 TP0] Decode batch. #running-req: 4, #token: 9187, token usage: 0.24, gen throughput (token/s): 203.50, #queue-req: 0
  21981. 2025-07-20 17:29:09,982 - __main__ - INFO - sglang running req: 4 queue req: 0
  21982. 2025-07-20 17:29:10,840 - sglang - INFO - [2025-07-20 17:29:10 TP0] Decode batch. #running-req: 4, #token: 9347, token usage: 0.25, gen throughput (token/s): 186.41, #queue-req: 0
  21983. 2025-07-20 17:29:10,840 - __main__ - INFO - sglang running req: 4 queue req: 0
  21984. 2025-07-20 17:29:11,707 - sglang - INFO - [2025-07-20 17:29:11 TP0] Decode batch. #running-req: 4, #token: 9507, token usage: 0.25, gen throughput (token/s): 184.57, #queue-req: 0
  21985. 2025-07-20 17:29:11,707 - __main__ - INFO - sglang running req: 4 queue req: 0
  21986. 2025-07-20 17:29:12,575 - sglang - INFO - [2025-07-20 17:29:12 TP0] Decode batch. #running-req: 4, #token: 9667, token usage: 0.25, gen throughput (token/s): 184.27, #queue-req: 0
  21987. 2025-07-20 17:29:12,575 - __main__ - INFO - sglang running req: 4 queue req: 0
  21988. 2025-07-20 17:29:13,442 - sglang - INFO - [2025-07-20 17:29:13 TP0] Decode batch. #running-req: 4, #token: 9827, token usage: 0.26, gen throughput (token/s): 184.39, #queue-req: 0
  21989. 2025-07-20 17:29:13,443 - __main__ - INFO - sglang running req: 4 queue req: 0
  21990. 2025-07-20 17:29:14,310 - sglang - INFO - [2025-07-20 17:29:14 TP0] Decode batch. #running-req: 4, #token: 9987, token usage: 0.26, gen throughput (token/s): 184.45, #queue-req: 0
  21991. 2025-07-20 17:29:14,310 - __main__ - INFO - sglang running req: 4 queue req: 0
  21992. 2025-07-20 17:29:15,179 - sglang - INFO - [2025-07-20 17:29:15 TP0] Decode batch. #running-req: 4, #token: 10147, token usage: 0.27, gen throughput (token/s): 184.17, #queue-req: 0
  21993. 2025-07-20 17:29:15,179 - __main__ - INFO - sglang running req: 4 queue req: 0
  21994. 2025-07-20 17:29:16,046 - sglang - INFO - [2025-07-20 17:29:16 TP0] Decode batch. #running-req: 4, #token: 10307, token usage: 0.27, gen throughput (token/s): 184.54, #queue-req: 0
  21995. 2025-07-20 17:29:16,046 - __main__ - INFO - sglang running req: 4 queue req: 0
  21996. 2025-07-20 17:29:16,775 - __main__ - INFO - Queue remaining: 25
  21997. 2025-07-20 17:29:16,775 - __main__ - INFO -
  21998. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  21999. ----------------------------------------------------------------------------------
  22000. finished_input_tokens 328.14 328.14
  22001. finished_output_tokens 74.85 74.85
  22002. sglang_input_tokens 340.25 340.25
  22003. sglang_output_tokens 86.06 86.06
  22004. 2025-07-20 17:29:16,775 - __main__ - INFO -
  22005. Worker ID | finished | started
  22006. ----------+----------+--------
  22007. 0 | 1 | 5
  22008. 2025-07-20 17:29:16,915 - sglang - INFO - [2025-07-20 17:29:16 TP0] Decode batch. #running-req: 4, #token: 10467, token usage: 0.28, gen throughput (token/s): 184.01, #queue-req: 0
  22009. 2025-07-20 17:29:16,915 - __main__ - INFO - sglang running req: 4 queue req: 0
  22010. 2025-07-20 17:29:17,780 - sglang - INFO - [2025-07-20 17:29:17 TP0] Decode batch. #running-req: 4, #token: 10627, token usage: 0.28, gen throughput (token/s): 184.95, #queue-req: 0
  22011. 2025-07-20 17:29:17,781 - __main__ - INFO - sglang running req: 4 queue req: 0
  22012. 2025-07-20 17:29:18,647 - sglang - INFO - [2025-07-20 17:29:18 TP0] Decode batch. #running-req: 4, #token: 10787, token usage: 0.28, gen throughput (token/s): 184.72, #queue-req: 0
  22013. 2025-07-20 17:29:18,647 - __main__ - INFO - sglang running req: 4 queue req: 0
  22014. 2025-07-20 17:29:19,507 - sglang - INFO - [2025-07-20 17:29:19 TP0] Decode batch. #running-req: 2, #token: 5519, token usage: 0.15, gen throughput (token/s): 126.68, #queue-req: 0
  22015. 2025-07-20 17:29:19,507 - __main__ - INFO - sglang running req: 2 queue req: 0
  22016. 2025-07-20 17:29:20,356 - sglang - INFO - [2025-07-20 17:29:20 TP0] Decode batch. #running-req: 2, #token: 5599, token usage: 0.15, gen throughput (token/s): 94.22, #queue-req: 0
  22017. 2025-07-20 17:29:20,356 - __main__ - INFO - sglang running req: 2 queue req: 0
  22018. 2025-07-20 17:29:21,205 - sglang - INFO - [2025-07-20 17:29:21 TP0] Decode batch. #running-req: 2, #token: 5679, token usage: 0.15, gen throughput (token/s): 94.20, #queue-req: 0
  22019. 2025-07-20 17:29:21,205 - __main__ - INFO - sglang running req: 2 queue req: 0
  22020. 2025-07-20 17:29:22,049 - sglang - INFO - [2025-07-20 17:29:22 TP0] Decode batch. #running-req: 1, #token: 3150, token usage: 0.08, gen throughput (token/s): 62.77, #queue-req: 0
  22021. 2025-07-20 17:29:22,050 - __main__ - INFO - sglang running req: 1 queue req: 0
  22022. 2025-07-20 17:29:22,099 - __main__ - INFO - Finished TaskGroup for worker on 2ff00bac5e9500c24956e5386f6e7a49b2b55098
  22023. 2025-07-20 17:29:22,099 - __main__ - INFO - Got 1 docs for 2ff00bac5e9500c24956e5386f6e7a49b2b55098
  22024. 2025-07-20 17:29:22,100 - __main__ - INFO - Worker 0 processing work item aef98857329873e434b4b835531b5abd2cfca622
  22025. 2025-07-20 17:29:22,101 - __main__ - INFO - Created all tasks for aef98857329873e434b4b835531b5abd2cfca622
  22026. 2025-07-20 17:29:22,108 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900702.pdf in worker 0
  22027. 2025-07-20 17:29:22,218 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-6
  22028. 2025-07-20 17:29:22,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-2
  22029. 2025-07-20 17:29:22,298 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-3
  22030. 2025-07-20 17:29:22,300 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-1
  22031. 2025-07-20 17:29:22,358 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-4
  22032. 2025-07-20 17:29:22,364 - sglang - INFO - [2025-07-20 17:29:22 TP0] Prefill batch. #new-seq: 1, #new-token: 1496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22033. 2025-07-20 17:29:22,364 - __main__ - INFO - sglang running req: 0 queue req: 0
  22034. 2025-07-20 17:29:22,366 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900702.pdf-5
  22035. 2025-07-20 17:29:22,941 - sglang - INFO - [2025-07-20 17:29:22 TP0] Prefill batch. #new-seq: 5, #new-token: 11856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  22036. 2025-07-20 17:29:22,941 - __main__ - INFO - sglang running req: 1 queue req: 0
  22037. 2025-07-20 17:29:26,776 - __main__ - INFO - Queue remaining: 24
  22038. 2025-07-20 17:29:26,777 - __main__ - INFO -
  22039. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22040. ----------------------------------------------------------------------------------
  22041. finished_input_tokens 352.31 352.31
  22042. finished_output_tokens 79.99 79.99
  22043. sglang_input_tokens 360.06 360.06
  22044. sglang_output_tokens 90.68 90.68
  22045. 2025-07-20 17:29:26,777 - __main__ - INFO -
  22046. Worker ID | started
  22047. ----------+--------
  22048. 0 | 6
  22049. 2025-07-20 17:29:27,285 - sglang - INFO - [2025-07-20 17:29:27 TP0] Decode batch. #running-req: 6, #token: 13580, token usage: 0.36, gen throughput (token/s): 43.93, #queue-req: 0
  22050. 2025-07-20 17:29:27,285 - __main__ - INFO - sglang running req: 6 queue req: 0
  22051. 2025-07-20 17:29:28,173 - sglang - INFO - [2025-07-20 17:29:28 TP0] Decode batch. #running-req: 6, #token: 13820, token usage: 0.36, gen throughput (token/s): 270.34, #queue-req: 0
  22052. 2025-07-20 17:29:28,173 - __main__ - INFO - sglang running req: 6 queue req: 0
  22053. 2025-07-20 17:29:29,062 - sglang - INFO - [2025-07-20 17:29:29 TP0] Decode batch. #running-req: 6, #token: 14060, token usage: 0.37, gen throughput (token/s): 269.89, #queue-req: 0
  22054. 2025-07-20 17:29:29,062 - __main__ - INFO - sglang running req: 6 queue req: 0
  22055. 2025-07-20 17:29:29,951 - sglang - INFO - [2025-07-20 17:29:29 TP0] Decode batch. #running-req: 6, #token: 14300, token usage: 0.38, gen throughput (token/s): 270.15, #queue-req: 0
  22056. 2025-07-20 17:29:29,951 - __main__ - INFO - sglang running req: 6 queue req: 0
  22057. 2025-07-20 17:29:30,838 - sglang - INFO - [2025-07-20 17:29:30 TP0] Decode batch. #running-req: 6, #token: 14540, token usage: 0.38, gen throughput (token/s): 270.35, #queue-req: 0
  22058. 2025-07-20 17:29:30,838 - __main__ - INFO - sglang running req: 6 queue req: 0
  22059. 2025-07-20 17:29:31,722 - sglang - INFO - [2025-07-20 17:29:31 TP0] Decode batch. #running-req: 5, #token: 13046, token usage: 0.34, gen throughput (token/s): 256.71, #queue-req: 0
  22060. 2025-07-20 17:29:31,723 - __main__ - INFO - sglang running req: 5 queue req: 0
  22061. 2025-07-20 17:29:32,599 - sglang - INFO - [2025-07-20 17:29:32 TP0] Decode batch. #running-req: 5, #token: 13246, token usage: 0.35, gen throughput (token/s): 228.13, #queue-req: 0
  22062. 2025-07-20 17:29:32,599 - __main__ - INFO - sglang running req: 5 queue req: 0
  22063. 2025-07-20 17:29:33,466 - sglang - INFO - [2025-07-20 17:29:33 TP0] Decode batch. #running-req: 4, #token: 10752, token usage: 0.28, gen throughput (token/s): 189.13, #queue-req: 0
  22064. 2025-07-20 17:29:33,466 - __main__ - INFO - sglang running req: 4 queue req: 0
  22065. 2025-07-20 17:29:34,338 - sglang - INFO - [2025-07-20 17:29:34 TP0] Decode batch. #running-req: 4, #token: 10912, token usage: 0.29, gen throughput (token/s): 183.59, #queue-req: 0
  22066. 2025-07-20 17:29:34,338 - __main__ - INFO - sglang running req: 4 queue req: 0
  22067. 2025-07-20 17:29:35,204 - sglang - INFO - [2025-07-20 17:29:35 TP0] Decode batch. #running-req: 3, #token: 8986, token usage: 0.24, gen throughput (token/s): 151.28, #queue-req: 0
  22068. 2025-07-20 17:29:35,204 - __main__ - INFO - sglang running req: 3 queue req: 0
  22069. 2025-07-20 17:29:36,066 - sglang - INFO - [2025-07-20 17:29:36 TP0] Decode batch. #running-req: 3, #token: 9106, token usage: 0.24, gen throughput (token/s): 139.23, #queue-req: 0
  22070. 2025-07-20 17:29:36,066 - __main__ - INFO - sglang running req: 3 queue req: 0
  22071. 2025-07-20 17:29:36,778 - __main__ - INFO - Queue remaining: 24
  22072. 2025-07-20 17:29:36,779 - __main__ - INFO -
  22073. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22074. ----------------------------------------------------------------------------------
  22075. finished_input_tokens 340.18 340.18
  22076. finished_output_tokens 77.23 77.23
  22077. sglang_input_tokens 366.80 366.80
  22078. sglang_output_tokens 90.58 90.58
  22079. 2025-07-20 17:29:36,779 - __main__ - INFO -
  22080. Worker ID | finished | started
  22081. ----------+----------+--------
  22082. 0 | 3 | 6
  22083. 2025-07-20 17:29:36,931 - sglang - INFO - [2025-07-20 17:29:36 TP0] Decode batch. #running-req: 3, #token: 9226, token usage: 0.24, gen throughput (token/s): 138.75, #queue-req: 0
  22084. 2025-07-20 17:29:36,931 - __main__ - INFO - sglang running req: 3 queue req: 0
  22085. 2025-07-20 17:29:37,796 - sglang - INFO - [2025-07-20 17:29:37 TP0] Decode batch. #running-req: 3, #token: 9346, token usage: 0.25, gen throughput (token/s): 138.72, #queue-req: 0
  22086. 2025-07-20 17:29:37,796 - __main__ - INFO - sglang running req: 3 queue req: 0
  22087. 2025-07-20 17:29:38,660 - sglang - INFO - [2025-07-20 17:29:38 TP0] Decode batch. #running-req: 3, #token: 9466, token usage: 0.25, gen throughput (token/s): 138.82, #queue-req: 0
  22088. 2025-07-20 17:29:38,660 - __main__ - INFO - sglang running req: 3 queue req: 0
  22089. 2025-07-20 17:29:39,525 - sglang - INFO - [2025-07-20 17:29:39 TP0] Decode batch. #running-req: 3, #token: 9586, token usage: 0.25, gen throughput (token/s): 138.79, #queue-req: 0
  22090. 2025-07-20 17:29:39,525 - __main__ - INFO - sglang running req: 3 queue req: 0
  22091. 2025-07-20 17:29:40,383 - sglang - INFO - [2025-07-20 17:29:40 TP0] Decode batch. #running-req: 3, #token: 9706, token usage: 0.26, gen throughput (token/s): 139.75, #queue-req: 0
  22092. 2025-07-20 17:29:40,384 - __main__ - INFO - sglang running req: 3 queue req: 0
  22093. 2025-07-20 17:29:41,243 - sglang - INFO - [2025-07-20 17:29:41 TP0] Decode batch. #running-req: 2, #token: 6529, token usage: 0.17, gen throughput (token/s): 125.58, #queue-req: 0
  22094. 2025-07-20 17:29:41,244 - __main__ - INFO - sglang running req: 2 queue req: 0
  22095. 2025-07-20 17:29:42,096 - sglang - INFO - [2025-07-20 17:29:42 TP0] Decode batch. #running-req: 2, #token: 6609, token usage: 0.17, gen throughput (token/s): 93.83, #queue-req: 0
  22096. 2025-07-20 17:29:42,096 - __main__ - INFO - sglang running req: 2 queue req: 0
  22097. 2025-07-20 17:29:42,945 - sglang - INFO - [2025-07-20 17:29:42 TP0] Decode batch. #running-req: 1, #token: 3444, token usage: 0.09, gen throughput (token/s): 78.92, #queue-req: 0
  22098. 2025-07-20 17:29:42,945 - __main__ - INFO - sglang running req: 1 queue req: 0
  22099. 2025-07-20 17:29:43,786 - sglang - INFO - [2025-07-20 17:29:43 TP0] Decode batch. #running-req: 1, #token: 3484, token usage: 0.09, gen throughput (token/s): 47.54, #queue-req: 0
  22100. 2025-07-20 17:29:43,786 - __main__ - INFO - sglang running req: 1 queue req: 0
  22101. 2025-07-20 17:29:44,627 - sglang - INFO - [2025-07-20 17:29:44 TP0] Decode batch. #running-req: 1, #token: 3524, token usage: 0.09, gen throughput (token/s): 47.56, #queue-req: 0
  22102. 2025-07-20 17:29:44,627 - __main__ - INFO - sglang running req: 1 queue req: 0
  22103. 2025-07-20 17:29:45,469 - sglang - INFO - [2025-07-20 17:29:45 TP0] Decode batch. #running-req: 1, #token: 3564, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
  22104. 2025-07-20 17:29:45,469 - __main__ - INFO - sglang running req: 1 queue req: 0
  22105. 2025-07-20 17:29:46,106 - __main__ - INFO - Finished TaskGroup for worker on aef98857329873e434b4b835531b5abd2cfca622
  22106. 2025-07-20 17:29:46,106 - __main__ - INFO - Got 1 docs for aef98857329873e434b4b835531b5abd2cfca622
  22107. 2025-07-20 17:29:46,108 - __main__ - INFO - Worker 0 processing work item 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  22108. 2025-07-20 17:29:46,108 - __main__ - INFO - Created all tasks for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  22109. 2025-07-20 17:29:46,114 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029005.pdf in worker 0
  22110. 2025-07-20 17:29:46,271 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-7
  22111. 2025-07-20 17:29:46,289 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-4
  22112. 2025-07-20 17:29:46,311 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-1
  22113. 2025-07-20 17:29:46,355 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-5
  22114. 2025-07-20 17:29:46,392 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-6
  22115. 2025-07-20 17:29:46,401 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-3
  22116. 2025-07-20 17:29:46,417 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029005.pdf-2
  22117. 2025-07-20 17:29:46,443 - sglang - INFO - [2025-07-20 17:29:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1786, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22118. 2025-07-20 17:29:46,443 - __main__ - INFO - sglang running req: 0 queue req: 0
  22119. 2025-07-20 17:29:46,780 - __main__ - INFO - Queue remaining: 23
  22120. 2025-07-20 17:29:46,781 - __main__ - INFO -
  22121. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22122. ----------------------------------------------------------------------------------
  22123. finished_input_tokens 373.29 373.90
  22124. finished_output_tokens 85.31 85.45
  22125. sglang_input_tokens 380.52 381.15
  22126. sglang_output_tokens 95.30 95.45
  22127. 2025-07-20 17:29:46,781 - __main__ - INFO -
  22128. Worker ID | started
  22129. ----------+--------
  22130. 0 | 7
  22131. 2025-07-20 17:29:47,070 - sglang - INFO - [2025-07-20 17:29:47 TP0] Prefill batch. #new-seq: 6, #new-token: 14500, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  22132. 2025-07-20 17:29:47,070 - __main__ - INFO - sglang running req: 1 queue req: 0
  22133. 2025-07-20 17:29:51,561 - sglang - INFO - [2025-07-20 17:29:51 TP0] Decode batch. #running-req: 7, #token: 16356, token usage: 0.43, gen throughput (token/s): 16.42, #queue-req: 0
  22134. 2025-07-20 17:29:51,561 - __main__ - INFO - sglang running req: 7 queue req: 0
  22135. 2025-07-20 17:29:52,456 - sglang - INFO - [2025-07-20 17:29:52 TP0] Decode batch. #running-req: 7, #token: 16636, token usage: 0.44, gen throughput (token/s): 312.96, #queue-req: 0
  22136. 2025-07-20 17:29:52,456 - __main__ - INFO - sglang running req: 7 queue req: 0
  22137. 2025-07-20 17:29:53,348 - sglang - INFO - [2025-07-20 17:29:53 TP0] Decode batch. #running-req: 7, #token: 16916, token usage: 0.45, gen throughput (token/s): 313.71, #queue-req: 0
  22138. 2025-07-20 17:29:53,348 - __main__ - INFO - sglang running req: 7 queue req: 0
  22139. 2025-07-20 17:29:54,238 - sglang - INFO - [2025-07-20 17:29:54 TP0] Decode batch. #running-req: 7, #token: 17196, token usage: 0.45, gen throughput (token/s): 314.82, #queue-req: 0
  22140. 2025-07-20 17:29:54,238 - __main__ - INFO - sglang running req: 7 queue req: 0
  22141. 2025-07-20 17:29:55,129 - sglang - INFO - [2025-07-20 17:29:55 TP0] Decode batch. #running-req: 7, #token: 17476, token usage: 0.46, gen throughput (token/s): 314.05, #queue-req: 0
  22142. 2025-07-20 17:29:55,129 - __main__ - INFO - sglang running req: 7 queue req: 0
  22143. 2025-07-20 17:29:56,024 - sglang - INFO - [2025-07-20 17:29:56 TP0] Decode batch. #running-req: 7, #token: 17756, token usage: 0.47, gen throughput (token/s): 312.98, #queue-req: 0
  22144. 2025-07-20 17:29:56,024 - __main__ - INFO - sglang running req: 7 queue req: 0
  22145. 2025-07-20 17:29:56,783 - __main__ - INFO - Queue remaining: 23
  22146. 2025-07-20 17:29:56,783 - __main__ - INFO -
  22147. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22148. ----------------------------------------------------------------------------------
  22149. finished_input_tokens 361.27 373.90
  22150. finished_output_tokens 82.57 85.45
  22151. sglang_input_tokens 368.26 381.15
  22152. sglang_output_tokens 92.23 95.45
  22153. 2025-07-20 17:29:56,783 - __main__ - INFO -
  22154. Worker ID | started
  22155. ----------+--------
  22156. 0 | 7
  22157. 2025-07-20 17:29:56,923 - sglang - INFO - [2025-07-20 17:29:56 TP0] Decode batch. #running-req: 7, #token: 18036, token usage: 0.47, gen throughput (token/s): 311.45, #queue-req: 0
  22158. 2025-07-20 17:29:56,923 - __main__ - INFO - sglang running req: 7 queue req: 0
  22159. 2025-07-20 17:29:57,821 - sglang - INFO - [2025-07-20 17:29:57 TP0] Decode batch. #running-req: 7, #token: 18316, token usage: 0.48, gen throughput (token/s): 311.77, #queue-req: 0
  22160. 2025-07-20 17:29:57,821 - __main__ - INFO - sglang running req: 7 queue req: 0
  22161. 2025-07-20 17:29:58,720 - sglang - INFO - [2025-07-20 17:29:58 TP0] Decode batch. #running-req: 7, #token: 18596, token usage: 0.49, gen throughput (token/s): 311.35, #queue-req: 0
  22162. 2025-07-20 17:29:58,721 - __main__ - INFO - sglang running req: 7 queue req: 0
  22163. 2025-07-20 17:29:59,619 - sglang - INFO - [2025-07-20 17:29:59 TP0] Decode batch. #running-req: 7, #token: 18876, token usage: 0.50, gen throughput (token/s): 311.40, #queue-req: 0
  22164. 2025-07-20 17:29:59,620 - __main__ - INFO - sglang running req: 7 queue req: 0
  22165. 2025-07-20 17:30:00,518 - sglang - INFO - [2025-07-20 17:30:00 TP0] Decode batch. #running-req: 7, #token: 19156, token usage: 0.50, gen throughput (token/s): 311.43, #queue-req: 0
  22166. 2025-07-20 17:30:00,519 - __main__ - INFO - sglang running req: 7 queue req: 0
  22167. 2025-07-20 17:30:01,417 - sglang - INFO - [2025-07-20 17:30:01 TP0] Decode batch. #running-req: 6, #token: 17200, token usage: 0.45, gen throughput (token/s): 296.15, #queue-req: 0
  22168. 2025-07-20 17:30:01,417 - __main__ - INFO - sglang running req: 6 queue req: 0
  22169. 2025-07-20 17:30:02,307 - sglang - INFO - [2025-07-20 17:30:02 TP0] Decode batch. #running-req: 6, #token: 17440, token usage: 0.46, gen throughput (token/s): 269.65, #queue-req: 0
  22170. 2025-07-20 17:30:02,307 - __main__ - INFO - sglang running req: 6 queue req: 0
  22171. 2025-07-20 17:30:03,196 - sglang - INFO - [2025-07-20 17:30:03 TP0] Decode batch. #running-req: 6, #token: 17680, token usage: 0.47, gen throughput (token/s): 269.98, #queue-req: 0
  22172. 2025-07-20 17:30:03,196 - __main__ - INFO - sglang running req: 6 queue req: 0
  22173. 2025-07-20 17:30:04,093 - sglang - INFO - [2025-07-20 17:30:04 TP0] Decode batch. #running-req: 6, #token: 17920, token usage: 0.47, gen throughput (token/s): 267.37, #queue-req: 0
  22174. 2025-07-20 17:30:04,094 - __main__ - INFO - sglang running req: 6 queue req: 0
  22175. 2025-07-20 17:30:04,989 - sglang - INFO - [2025-07-20 17:30:04 TP0] Decode batch. #running-req: 6, #token: 18160, token usage: 0.48, gen throughput (token/s): 267.81, #queue-req: 0
  22176. 2025-07-20 17:30:04,989 - __main__ - INFO - sglang running req: 6 queue req: 0
  22177. 2025-07-20 17:30:05,868 - sglang - INFO - [2025-07-20 17:30:05 TP0] Decode batch. #running-req: 4, #token: 12461, token usage: 0.33, gen throughput (token/s): 196.91, #queue-req: 0
  22178. 2025-07-20 17:30:05,868 - __main__ - INFO - sglang running req: 4 queue req: 0
  22179. 2025-07-20 17:30:06,742 - sglang - INFO - [2025-07-20 17:30:06 TP0] Decode batch. #running-req: 4, #token: 12621, token usage: 0.33, gen throughput (token/s): 183.02, #queue-req: 0
  22180. 2025-07-20 17:30:06,743 - __main__ - INFO - sglang running req: 4 queue req: 0
  22181. 2025-07-20 17:30:06,784 - __main__ - INFO - Queue remaining: 23
  22182. 2025-07-20 17:30:06,784 - __main__ - INFO -
  22183. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22184. ----------------------------------------------------------------------------------
  22185. finished_input_tokens 349.99 373.90
  22186. finished_output_tokens 79.99 85.45
  22187. sglang_input_tokens 376.82 402.56
  22188. sglang_output_tokens 94.57 101.03
  22189. 2025-07-20 17:30:06,785 - __main__ - INFO -
  22190. Worker ID | finished | started
  22191. ----------+----------+--------
  22192. 0 | 3 | 7
  22193. 2025-07-20 17:30:07,611 - sglang - INFO - [2025-07-20 17:30:07 TP0] Decode batch. #running-req: 3, #token: 9603, token usage: 0.25, gen throughput (token/s): 148.50, #queue-req: 0
  22194. 2025-07-20 17:30:07,611 - __main__ - INFO - sglang running req: 3 queue req: 0
  22195. 2025-07-20 17:30:08,476 - sglang - INFO - [2025-07-20 17:30:08 TP0] Decode batch. #running-req: 3, #token: 9723, token usage: 0.26, gen throughput (token/s): 138.72, #queue-req: 0
  22196. 2025-07-20 17:30:08,476 - __main__ - INFO - sglang running req: 3 queue req: 0
  22197. 2025-07-20 17:30:09,325 - sglang - INFO - [2025-07-20 17:30:09 TP0] Decode batch. #running-req: 2, #token: 6582, token usage: 0.17, gen throughput (token/s): 100.10, #queue-req: 0
  22198. 2025-07-20 17:30:09,326 - __main__ - INFO - sglang running req: 2 queue req: 0
  22199. 2025-07-20 17:30:10,170 - sglang - INFO - [2025-07-20 17:30:10 TP0] Decode batch. #running-req: 2, #token: 6662, token usage: 0.18, gen throughput (token/s): 94.69, #queue-req: 0
  22200. 2025-07-20 17:30:10,170 - __main__ - INFO - sglang running req: 2 queue req: 0
  22201. 2025-07-20 17:30:11,018 - sglang - INFO - [2025-07-20 17:30:11 TP0] Decode batch. #running-req: 2, #token: 6742, token usage: 0.18, gen throughput (token/s): 94.36, #queue-req: 0
  22202. 2025-07-20 17:30:11,018 - __main__ - INFO - sglang running req: 2 queue req: 0
  22203. 2025-07-20 17:30:11,871 - sglang - INFO - [2025-07-20 17:30:11 TP0] Decode batch. #running-req: 2, #token: 6822, token usage: 0.18, gen throughput (token/s): 93.79, #queue-req: 0
  22204. 2025-07-20 17:30:11,871 - __main__ - INFO - sglang running req: 2 queue req: 0
  22205. 2025-07-20 17:30:12,721 - sglang - INFO - [2025-07-20 17:30:12 TP0] Decode batch. #running-req: 2, #token: 6902, token usage: 0.18, gen throughput (token/s): 94.13, #queue-req: 0
  22206. 2025-07-20 17:30:12,721 - __main__ - INFO - sglang running req: 2 queue req: 0
  22207. 2025-07-20 17:30:13,572 - sglang - INFO - [2025-07-20 17:30:13 TP0] Decode batch. #running-req: 2, #token: 6982, token usage: 0.18, gen throughput (token/s): 94.00, #queue-req: 0
  22208. 2025-07-20 17:30:13,572 - __main__ - INFO - sglang running req: 2 queue req: 0
  22209. 2025-07-20 17:30:14,426 - sglang - INFO - [2025-07-20 17:30:14 TP0] Decode batch. #running-req: 2, #token: 7062, token usage: 0.19, gen throughput (token/s): 93.63, #queue-req: 0
  22210. 2025-07-20 17:30:14,426 - __main__ - INFO - sglang running req: 2 queue req: 0
  22211. 2025-07-20 17:30:15,274 - sglang - INFO - [2025-07-20 17:30:15 TP0] Decode batch. #running-req: 1, #token: 3634, token usage: 0.10, gen throughput (token/s): 68.40, #queue-req: 0
  22212. 2025-07-20 17:30:15,274 - __main__ - INFO - sglang running req: 1 queue req: 0
  22213. 2025-07-20 17:30:15,324 - __main__ - INFO - Finished TaskGroup for worker on 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  22214. 2025-07-20 17:30:15,324 - __main__ - INFO - Got 1 docs for 0640d37e5d5afe1fb4a4e053d7d3389e927e5bf7
  22215. 2025-07-20 17:30:15,326 - __main__ - INFO - Worker 0 processing work item f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
  22216. 2025-07-20 17:30:15,326 - __main__ - INFO - Created all tasks for f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
  22217. 2025-07-20 17:30:15,333 - __main__ - INFO - Got 8 pages to do for test_pdf/1144520000702630XG344010603501801.pdf in worker 0
  22218. 2025-07-20 17:30:15,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-6
  22219. 2025-07-20 17:30:15,478 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-4
  22220. 2025-07-20 17:30:15,487 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-3
  22221. 2025-07-20 17:30:15,508 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-5
  22222. 2025-07-20 17:30:15,524 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-2
  22223. 2025-07-20 17:30:15,534 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-1
  22224. 2025-07-20 17:30:15,556 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-8
  22225. 2025-07-20 17:30:15,605 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010603501801.pdf-7
  22226. 2025-07-20 17:30:15,634 - sglang - INFO - [2025-07-20 17:30:15 TP0] Prefill batch. #new-seq: 1, #new-token: 1553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22227. 2025-07-20 17:30:15,634 - __main__ - INFO - sglang running req: 0 queue req: 0
  22228. 2025-07-20 17:30:16,234 - sglang - INFO - [2025-07-20 17:30:16 TP0] Prefill batch. #new-seq: 6, #new-token: 12982, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 1
  22229. 2025-07-20 17:30:16,234 - __main__ - INFO - sglang running req: 1 queue req: 1
  22230. 2025-07-20 17:30:16,786 - __main__ - INFO - Queue remaining: 22
  22231. 2025-07-20 17:30:16,787 - __main__ - INFO -
  22232. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22233. ----------------------------------------------------------------------------------
  22234. finished_input_tokens 388.68 428.19
  22235. finished_output_tokens 93.63 103.15
  22236. sglang_input_tokens 395.25 435.43
  22237. sglang_output_tokens 102.71 113.15
  22238. 2025-07-20 17:30:16,787 - __main__ - INFO -
  22239. Worker ID | started
  22240. ----------+--------
  22241. 0 | 8
  22242. 2025-07-20 17:30:21,035 - sglang - INFO - [2025-07-20 17:30:21 TP0] Decode batch. #running-req: 7, #token: 14801, token usage: 0.39, gen throughput (token/s): 46.52, #queue-req: 1
  22243. 2025-07-20 17:30:21,036 - __main__ - INFO - sglang running req: 7 queue req: 1
  22244. 2025-07-20 17:30:21,926 - sglang - INFO - [2025-07-20 17:30:21 TP0] Decode batch. #running-req: 7, #token: 15081, token usage: 0.40, gen throughput (token/s): 314.31, #queue-req: 1
  22245. 2025-07-20 17:30:21,927 - __main__ - INFO - sglang running req: 7 queue req: 1
  22246. 2025-07-20 17:30:22,817 - sglang - INFO - [2025-07-20 17:30:22 TP0] Decode batch. #running-req: 7, #token: 15361, token usage: 0.40, gen throughput (token/s): 314.34, #queue-req: 1
  22247. 2025-07-20 17:30:22,817 - __main__ - INFO - sglang running req: 7 queue req: 1
  22248. 2025-07-20 17:30:23,708 - sglang - INFO - [2025-07-20 17:30:23 TP0] Decode batch. #running-req: 7, #token: 15641, token usage: 0.41, gen throughput (token/s): 314.11, #queue-req: 1
  22249. 2025-07-20 17:30:23,709 - __main__ - INFO - sglang running req: 7 queue req: 1
  22250. 2025-07-20 17:30:24,598 - sglang - INFO - [2025-07-20 17:30:24 TP0] Decode batch. #running-req: 7, #token: 15921, token usage: 0.42, gen throughput (token/s): 314.86, #queue-req: 1
  22251. 2025-07-20 17:30:24,598 - __main__ - INFO - sglang running req: 7 queue req: 1
  22252. 2025-07-20 17:30:25,488 - sglang - INFO - [2025-07-20 17:30:25 TP0] Decode batch. #running-req: 7, #token: 16201, token usage: 0.43, gen throughput (token/s): 314.63, #queue-req: 1
  22253. 2025-07-20 17:30:25,488 - __main__ - INFO - sglang running req: 7 queue req: 1
  22254. 2025-07-20 17:30:25,555 - sglang - INFO - [2025-07-20 17:30:25 TP0] Prefill batch. #new-seq: 1, #new-token: 2623, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 0
  22255. 2025-07-20 17:30:25,555 - __main__ - INFO - sglang running req: 6 queue req: 0
  22256. 2025-07-20 17:30:26,788 - __main__ - INFO - Queue remaining: 22
  22257. 2025-07-20 17:30:26,789 - __main__ - INFO -
  22258. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22259. ----------------------------------------------------------------------------------
  22260. finished_input_tokens 377.26 428.19
  22261. finished_output_tokens 90.88 103.15
  22262. sglang_input_tokens 388.20 440.61
  22263. sglang_output_tokens 100.40 113.96
  22264. 2025-07-20 17:30:26,789 - __main__ - INFO -
  22265. Worker ID | finished | started
  22266. ----------+----------+--------
  22267. 0 | 1 | 8
  22268. 2025-07-20 17:30:27,165 - sglang - INFO - [2025-07-20 17:30:27 TP0] Decode batch. #running-req: 7, #token: 17310, token usage: 0.46, gen throughput (token/s): 166.34, #queue-req: 0
  22269. 2025-07-20 17:30:27,165 - __main__ - INFO - sglang running req: 7 queue req: 0
  22270. 2025-07-20 17:30:28,057 - sglang - INFO - [2025-07-20 17:30:28 TP0] Decode batch. #running-req: 6, #token: 15265, token usage: 0.40, gen throughput (token/s): 274.66, #queue-req: 0
  22271. 2025-07-20 17:30:28,057 - __main__ - INFO - sglang running req: 6 queue req: 0
  22272. 2025-07-20 17:30:28,948 - sglang - INFO - [2025-07-20 17:30:28 TP0] Decode batch. #running-req: 6, #token: 15505, token usage: 0.41, gen throughput (token/s): 269.43, #queue-req: 0
  22273. 2025-07-20 17:30:28,948 - __main__ - INFO - sglang running req: 6 queue req: 0
  22274. 2025-07-20 17:30:29,838 - sglang - INFO - [2025-07-20 17:30:29 TP0] Decode batch. #running-req: 6, #token: 15745, token usage: 0.41, gen throughput (token/s): 269.63, #queue-req: 0
  22275. 2025-07-20 17:30:29,838 - __main__ - INFO - sglang running req: 6 queue req: 0
  22276. 2025-07-20 17:30:30,727 - sglang - INFO - [2025-07-20 17:30:30 TP0] Decode batch. #running-req: 6, #token: 15985, token usage: 0.42, gen throughput (token/s): 269.91, #queue-req: 0
  22277. 2025-07-20 17:30:30,727 - __main__ - INFO - sglang running req: 6 queue req: 0
  22278. 2025-07-20 17:30:31,610 - sglang - INFO - [2025-07-20 17:30:31 TP0] Decode batch. #running-req: 5, #token: 13627, token usage: 0.36, gen throughput (token/s): 229.94, #queue-req: 0
  22279. 2025-07-20 17:30:31,610 - __main__ - INFO - sglang running req: 5 queue req: 0
  22280. 2025-07-20 17:30:32,489 - sglang - INFO - [2025-07-20 17:30:32 TP0] Decode batch. #running-req: 5, #token: 13827, token usage: 0.36, gen throughput (token/s): 227.56, #queue-req: 0
  22281. 2025-07-20 17:30:32,489 - __main__ - INFO - sglang running req: 5 queue req: 0
  22282. 2025-07-20 17:30:33,373 - sglang - INFO - [2025-07-20 17:30:33 TP0] Decode batch. #running-req: 5, #token: 14027, token usage: 0.37, gen throughput (token/s): 226.28, #queue-req: 0
  22283. 2025-07-20 17:30:33,373 - __main__ - INFO - sglang running req: 5 queue req: 0
  22284. 2025-07-20 17:30:34,261 - sglang - INFO - [2025-07-20 17:30:34 TP0] Decode batch. #running-req: 5, #token: 14227, token usage: 0.37, gen throughput (token/s): 225.06, #queue-req: 0
  22285. 2025-07-20 17:30:34,262 - __main__ - INFO - sglang running req: 5 queue req: 0
  22286. 2025-07-20 17:30:35,137 - sglang - INFO - [2025-07-20 17:30:35 TP0] Decode batch. #running-req: 3, #token: 8548, token usage: 0.23, gen throughput (token/s): 188.35, #queue-req: 0
  22287. 2025-07-20 17:30:35,138 - __main__ - INFO - sglang running req: 3 queue req: 0
  22288. 2025-07-20 17:30:36,000 - sglang - INFO - [2025-07-20 17:30:36 TP0] Decode batch. #running-req: 3, #token: 8668, token usage: 0.23, gen throughput (token/s): 139.04, #queue-req: 0
  22289. 2025-07-20 17:30:36,001 - __main__ - INFO - sglang running req: 3 queue req: 0
  22290. 2025-07-20 17:30:36,790 - __main__ - INFO - Queue remaining: 22
  22291. 2025-07-20 17:30:36,790 - __main__ - INFO -
  22292. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22293. ----------------------------------------------------------------------------------
  22294. finished_input_tokens 366.49 428.19
  22295. finished_output_tokens 88.29 103.15
  22296. sglang_input_tokens 414.16 483.88
  22297. sglang_output_tokens 107.09 125.12
  22298. 2025-07-20 17:30:36,790 - __main__ - INFO -
  22299. Worker ID | finished | started
  22300. ----------+----------+--------
  22301. 0 | 7 | 8
  22302. 2025-07-20 17:30:36,848 - sglang - INFO - [2025-07-20 17:30:36 TP0] Decode batch. #running-req: 1, #token: 3100, token usage: 0.08, gen throughput (token/s): 73.12, #queue-req: 0
  22303. 2025-07-20 17:30:36,849 - __main__ - INFO - sglang running req: 1 queue req: 0
  22304. 2025-07-20 17:30:37,690 - sglang - INFO - [2025-07-20 17:30:37 TP0] Decode batch. #running-req: 1, #token: 3140, token usage: 0.08, gen throughput (token/s): 47.55, #queue-req: 0
  22305. 2025-07-20 17:30:37,690 - __main__ - INFO - sglang running req: 1 queue req: 0
  22306. 2025-07-20 17:30:38,532 - sglang - INFO - [2025-07-20 17:30:38 TP0] Decode batch. #running-req: 1, #token: 3180, token usage: 0.08, gen throughput (token/s): 47.49, #queue-req: 0
  22307. 2025-07-20 17:30:38,532 - __main__ - INFO - sglang running req: 1 queue req: 0
  22308. 2025-07-20 17:30:39,374 - sglang - INFO - [2025-07-20 17:30:39 TP0] Decode batch. #running-req: 1, #token: 3220, token usage: 0.08, gen throughput (token/s): 47.50, #queue-req: 0
  22309. 2025-07-20 17:30:39,374 - __main__ - INFO - sglang running req: 1 queue req: 0
  22310. 2025-07-20 17:30:40,207 - sglang - INFO - [2025-07-20 17:30:40 TP0] Decode batch. #running-req: 1, #token: 3260, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
  22311. 2025-07-20 17:30:40,208 - __main__ - INFO - sglang running req: 1 queue req: 0
  22312. 2025-07-20 17:30:41,046 - sglang - INFO - [2025-07-20 17:30:41 TP0] Decode batch. #running-req: 1, #token: 3300, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
  22313. 2025-07-20 17:30:41,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  22314. 2025-07-20 17:30:41,889 - sglang - INFO - [2025-07-20 17:30:41 TP0] Decode batch. #running-req: 1, #token: 3340, token usage: 0.09, gen throughput (token/s): 47.48, #queue-req: 0
  22315. 2025-07-20 17:30:41,889 - __main__ - INFO - sglang running req: 1 queue req: 0
  22316. 2025-07-20 17:30:42,729 - sglang - INFO - [2025-07-20 17:30:42 TP0] Decode batch. #running-req: 1, #token: 3380, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
  22317. 2025-07-20 17:30:42,729 - __main__ - INFO - sglang running req: 1 queue req: 0
  22318. 2025-07-20 17:30:43,569 - sglang - INFO - [2025-07-20 17:30:43 TP0] Decode batch. #running-req: 1, #token: 3420, token usage: 0.09, gen throughput (token/s): 47.59, #queue-req: 0
  22319. 2025-07-20 17:30:43,570 - __main__ - INFO - sglang running req: 1 queue req: 0
  22320. 2025-07-20 17:30:44,409 - sglang - INFO - [2025-07-20 17:30:44 TP0] Decode batch. #running-req: 1, #token: 3460, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
  22321. 2025-07-20 17:30:44,410 - __main__ - INFO - sglang running req: 1 queue req: 0
  22322. 2025-07-20 17:30:45,249 - sglang - INFO - [2025-07-20 17:30:45 TP0] Decode batch. #running-req: 1, #token: 3500, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  22323. 2025-07-20 17:30:45,249 - __main__ - INFO - sglang running req: 1 queue req: 0
  22324. 2025-07-20 17:30:46,088 - sglang - INFO - [2025-07-20 17:30:46 TP0] Decode batch. #running-req: 1, #token: 3540, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
  22325. 2025-07-20 17:30:46,088 - __main__ - INFO - sglang running req: 1 queue req: 0
  22326. 2025-07-20 17:30:46,792 - __main__ - INFO - Queue remaining: 22
  22327. 2025-07-20 17:30:46,792 - __main__ - INFO -
  22328. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22329. ----------------------------------------------------------------------------------
  22330. finished_input_tokens 356.33 428.19
  22331. finished_output_tokens 85.84 103.15
  22332. sglang_input_tokens 402.67 483.88
  22333. sglang_output_tokens 104.12 125.12
  22334. 2025-07-20 17:30:46,792 - __main__ - INFO -
  22335. Worker ID | finished | started
  22336. ----------+----------+--------
  22337. 0 | 7 | 8
  22338. 2025-07-20 17:30:46,921 - sglang - INFO - [2025-07-20 17:30:46 TP0] Decode batch. #running-req: 1, #token: 3580, token usage: 0.09, gen throughput (token/s): 48.03, #queue-req: 0
  22339. 2025-07-20 17:30:46,921 - __main__ - INFO - sglang running req: 1 queue req: 0
  22340. 2025-07-20 17:30:47,751 - sglang - INFO - [2025-07-20 17:30:47 TP0] Decode batch. #running-req: 1, #token: 3620, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
  22341. 2025-07-20 17:30:47,752 - __main__ - INFO - sglang running req: 1 queue req: 0
  22342. 2025-07-20 17:30:48,588 - sglang - INFO - [2025-07-20 17:30:48 TP0] Decode batch. #running-req: 1, #token: 3660, token usage: 0.10, gen throughput (token/s): 47.79, #queue-req: 0
  22343. 2025-07-20 17:30:48,589 - __main__ - INFO - sglang running req: 1 queue req: 0
  22344. 2025-07-20 17:30:49,035 - __main__ - INFO - Finished TaskGroup for worker on f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
  22345. 2025-07-20 17:30:49,035 - __main__ - INFO - Got 1 docs for f89f7b1c93bc7bae613c7002942c0c65ba3a03d7
  22346. 2025-07-20 17:30:49,036 - __main__ - INFO - Worker 0 processing work item dbac13d5d8d14af821606b2b6fcec79288c911ad
  22347. 2025-07-20 17:30:49,036 - __main__ - INFO - Created all tasks for dbac13d5d8d14af821606b2b6fcec79288c911ad
  22348. 2025-07-20 17:30:49,045 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301901.pdf in worker 0
  22349. 2025-07-20 17:30:49,160 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-4
  22350. 2025-07-20 17:30:49,165 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-3
  22351. 2025-07-20 17:30:49,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-9
  22352. 2025-07-20 17:30:49,204 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-6
  22353. 2025-07-20 17:30:49,208 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-2
  22354. 2025-07-20 17:30:49,251 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-1
  22355. 2025-07-20 17:30:49,255 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-8
  22356. 2025-07-20 17:30:49,256 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-5
  22357. 2025-07-20 17:30:49,300 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301901.pdf-7
  22358. 2025-07-20 17:30:49,331 - sglang - INFO - [2025-07-20 17:30:49 TP0] Prefill batch. #new-seq: 1, #new-token: 2100, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22359. 2025-07-20 17:30:49,332 - __main__ - INFO - sglang running req: 0 queue req: 0
  22360. 2025-07-20 17:30:50,082 - sglang - INFO - [2025-07-20 17:30:50 TP0] Prefill batch. #new-seq: 6, #new-token: 12724, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.06, #running-req: 1, #queue-req: 2
  22361. 2025-07-20 17:30:50,082 - __main__ - INFO - sglang running req: 1 queue req: 2
  22362. 2025-07-20 17:30:54,426 - sglang - INFO - [2025-07-20 17:30:54 TP0] Decode batch. #running-req: 7, #token: 14957, token usage: 0.39, gen throughput (token/s): 26.38, #queue-req: 2
  22363. 2025-07-20 17:30:54,426 - __main__ - INFO - sglang running req: 7 queue req: 2
  22364. 2025-07-20 17:30:55,307 - sglang - INFO - [2025-07-20 17:30:55 TP0] Decode batch. #running-req: 7, #token: 15237, token usage: 0.40, gen throughput (token/s): 317.59, #queue-req: 2
  22365. 2025-07-20 17:30:55,308 - __main__ - INFO - sglang running req: 7 queue req: 2
  22366. 2025-07-20 17:30:56,197 - sglang - INFO - [2025-07-20 17:30:56 TP0] Decode batch. #running-req: 7, #token: 15517, token usage: 0.41, gen throughput (token/s): 314.76, #queue-req: 2
  22367. 2025-07-20 17:30:56,197 - __main__ - INFO - sglang running req: 7 queue req: 2
  22368. 2025-07-20 17:30:56,794 - __main__ - INFO - Queue remaining: 21
  22369. 2025-07-20 17:30:56,794 - __main__ - INFO -
  22370. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22371. ----------------------------------------------------------------------------------
  22372. finished_input_tokens 393.02 485.38
  22373. finished_output_tokens 96.07 118.65
  22374. sglang_input_tokens 398.88 492.63
  22375. sglang_output_tokens 104.17 128.65
  22376. 2025-07-20 17:30:56,794 - __main__ - INFO -
  22377. Worker ID | started
  22378. ----------+--------
  22379. 0 | 9
  22380. 2025-07-20 17:30:57,088 - sglang - INFO - [2025-07-20 17:30:57 TP0] Decode batch. #running-req: 7, #token: 15797, token usage: 0.42, gen throughput (token/s): 314.20, #queue-req: 2
  22381. 2025-07-20 17:30:57,088 - __main__ - INFO - sglang running req: 7 queue req: 2
  22382. 2025-07-20 17:30:57,978 - sglang - INFO - [2025-07-20 17:30:57 TP0] Decode batch. #running-req: 7, #token: 16077, token usage: 0.42, gen throughput (token/s): 314.51, #queue-req: 2
  22383. 2025-07-20 17:30:57,979 - __main__ - INFO - sglang running req: 7 queue req: 2
  22384. 2025-07-20 17:30:58,873 - sglang - INFO - [2025-07-20 17:30:58 TP0] Decode batch. #running-req: 7, #token: 16357, token usage: 0.43, gen throughput (token/s): 313.13, #queue-req: 2
  22385. 2025-07-20 17:30:58,873 - __main__ - INFO - sglang running req: 7 queue req: 2
  22386. 2025-07-20 17:30:59,500 - sglang - INFO - [2025-07-20 17:30:59 TP0] Prefill batch. #new-seq: 2, #new-token: 5006, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 0
  22387. 2025-07-20 17:30:59,500 - __main__ - INFO - sglang running req: 6 queue req: 0
  22388. 2025-07-20 17:31:01,286 - sglang - INFO - [2025-07-20 17:31:01 TP0] Decode batch. #running-req: 8, #token: 19280, token usage: 0.51, gen throughput (token/s): 120.56, #queue-req: 0
  22389. 2025-07-20 17:31:01,287 - __main__ - INFO - sglang running req: 8 queue req: 0
  22390. 2025-07-20 17:31:02,177 - sglang - INFO - [2025-07-20 17:31:02 TP0] Decode batch. #running-req: 7, #token: 17568, token usage: 0.46, gen throughput (token/s): 318.72, #queue-req: 0
  22391. 2025-07-20 17:31:02,178 - __main__ - INFO - sglang running req: 7 queue req: 0
  22392. 2025-07-20 17:31:03,067 - sglang - INFO - [2025-07-20 17:31:03 TP0] Decode batch. #running-req: 7, #token: 17848, token usage: 0.47, gen throughput (token/s): 314.84, #queue-req: 0
  22393. 2025-07-20 17:31:03,067 - __main__ - INFO - sglang running req: 7 queue req: 0
  22394. 2025-07-20 17:31:03,965 - sglang - INFO - [2025-07-20 17:31:03 TP0] Decode batch. #running-req: 7, #token: 18128, token usage: 0.48, gen throughput (token/s): 311.90, #queue-req: 0
  22395. 2025-07-20 17:31:03,965 - __main__ - INFO - sglang running req: 7 queue req: 0
  22396. 2025-07-20 17:31:04,850 - sglang - INFO - [2025-07-20 17:31:04 TP0] Decode batch. #running-req: 5, #token: 13723, token usage: 0.36, gen throughput (token/s): 232.52, #queue-req: 0
  22397. 2025-07-20 17:31:04,851 - __main__ - INFO - sglang running req: 5 queue req: 0
  22398. 2025-07-20 17:31:05,730 - sglang - INFO - [2025-07-20 17:31:05 TP0] Decode batch. #running-req: 4, #token: 11236, token usage: 0.30, gen throughput (token/s): 205.80, #queue-req: 0
  22399. 2025-07-20 17:31:05,730 - __main__ - INFO - sglang running req: 4 queue req: 0
  22400. 2025-07-20 17:31:06,600 - sglang - INFO - [2025-07-20 17:31:06 TP0] Decode batch. #running-req: 4, #token: 11396, token usage: 0.30, gen throughput (token/s): 183.86, #queue-req: 0
  22401. 2025-07-20 17:31:06,601 - __main__ - INFO - sglang running req: 4 queue req: 0
  22402. 2025-07-20 17:31:06,795 - __main__ - INFO - Queue remaining: 21
  22403. 2025-07-20 17:31:06,796 - __main__ - INFO -
  22404. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22405. ----------------------------------------------------------------------------------
  22406. finished_input_tokens 382.69 485.38
  22407. finished_output_tokens 93.55 118.65
  22408. sglang_input_tokens 414.51 525.75
  22409. sglang_output_tokens 105.95 134.38
  22410. 2025-07-20 17:31:06,796 - __main__ - INFO -
  22411. Worker ID | finished | started
  22412. ----------+----------+--------
  22413. 0 | 5 | 9
  22414. 2025-07-20 17:31:07,470 - sglang - INFO - [2025-07-20 17:31:07 TP0] Decode batch. #running-req: 4, #token: 11556, token usage: 0.30, gen throughput (token/s): 183.93, #queue-req: 0
  22415. 2025-07-20 17:31:07,470 - __main__ - INFO - sglang running req: 4 queue req: 0
  22416. 2025-07-20 17:31:08,340 - sglang - INFO - [2025-07-20 17:31:08 TP0] Decode batch. #running-req: 4, #token: 11716, token usage: 0.31, gen throughput (token/s): 183.93, #queue-req: 0
  22417. 2025-07-20 17:31:08,340 - __main__ - INFO - sglang running req: 4 queue req: 0
  22418. 2025-07-20 17:31:09,206 - sglang - INFO - [2025-07-20 17:31:09 TP0] Decode batch. #running-req: 4, #token: 11876, token usage: 0.31, gen throughput (token/s): 184.63, #queue-req: 0
  22419. 2025-07-20 17:31:09,207 - __main__ - INFO - sglang running req: 4 queue req: 0
  22420. 2025-07-20 17:31:10,075 - sglang - INFO - [2025-07-20 17:31:10 TP0] Decode batch. #running-req: 4, #token: 12036, token usage: 0.32, gen throughput (token/s): 184.35, #queue-req: 0
  22421. 2025-07-20 17:31:10,075 - __main__ - INFO - sglang running req: 4 queue req: 0
  22422. 2025-07-20 17:31:10,933 - sglang - INFO - [2025-07-20 17:31:10 TP0] Decode batch. #running-req: 3, #token: 8891, token usage: 0.23, gen throughput (token/s): 142.15, #queue-req: 0
  22423. 2025-07-20 17:31:10,933 - __main__ - INFO - sglang running req: 3 queue req: 0
  22424. 2025-07-20 17:31:11,786 - sglang - INFO - [2025-07-20 17:31:11 TP0] Decode batch. #running-req: 2, #token: 5990, token usage: 0.16, gen throughput (token/s): 116.02, #queue-req: 0
  22425. 2025-07-20 17:31:11,786 - __main__ - INFO - sglang running req: 2 queue req: 0
  22426. 2025-07-20 17:31:12,630 - sglang - INFO - [2025-07-20 17:31:12 TP0] Decode batch. #running-req: 2, #token: 6070, token usage: 0.16, gen throughput (token/s): 94.73, #queue-req: 0
  22427. 2025-07-20 17:31:12,631 - __main__ - INFO - sglang running req: 2 queue req: 0
  22428. 2025-07-20 17:31:13,471 - sglang - INFO - [2025-07-20 17:31:13 TP0] Decode batch. #running-req: 2, #token: 6150, token usage: 0.16, gen throughput (token/s): 95.19, #queue-req: 0
  22429. 2025-07-20 17:31:13,471 - __main__ - INFO - sglang running req: 2 queue req: 0
  22430. 2025-07-20 17:31:14,306 - sglang - INFO - [2025-07-20 17:31:14 TP0] Decode batch. #running-req: 1, #token: 2929, token usage: 0.08, gen throughput (token/s): 73.04, #queue-req: 0
  22431. 2025-07-20 17:31:14,307 - __main__ - INFO - sglang running req: 1 queue req: 0
  22432. 2025-07-20 17:31:15,136 - sglang - INFO - [2025-07-20 17:31:15 TP0] Decode batch. #running-req: 1, #token: 2969, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  22433. 2025-07-20 17:31:15,136 - __main__ - INFO - sglang running req: 1 queue req: 0
  22434. 2025-07-20 17:31:15,965 - sglang - INFO - [2025-07-20 17:31:15 TP0] Decode batch. #running-req: 1, #token: 3009, token usage: 0.08, gen throughput (token/s): 48.23, #queue-req: 0
  22435. 2025-07-20 17:31:15,966 - __main__ - INFO - sglang running req: 1 queue req: 0
  22436. 2025-07-20 17:31:16,794 - sglang - INFO - [2025-07-20 17:31:16 TP0] Decode batch. #running-req: 1, #token: 3049, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
  22437. 2025-07-20 17:31:16,795 - __main__ - INFO - sglang running req: 1 queue req: 0
  22438. 2025-07-20 17:31:16,797 - __main__ - INFO - Queue remaining: 21
  22439. 2025-07-20 17:31:16,797 - __main__ - INFO -
  22440. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22441. ----------------------------------------------------------------------------------
  22442. finished_input_tokens 372.89 485.38
  22443. finished_output_tokens 91.15 118.65
  22444. sglang_input_tokens 423.30 551.00
  22445. sglang_output_tokens 108.29 140.96
  22446. 2025-07-20 17:31:16,798 - __main__ - INFO -
  22447. Worker ID | finished | started
  22448. ----------+----------+--------
  22449. 0 | 8 | 9
  22450. 2025-07-20 17:31:17,624 - sglang - INFO - [2025-07-20 17:31:17 TP0] Decode batch. #running-req: 1, #token: 3089, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
  22451. 2025-07-20 17:31:17,625 - __main__ - INFO - sglang running req: 1 queue req: 0
  22452. 2025-07-20 17:31:18,455 - sglang - INFO - [2025-07-20 17:31:18 TP0] Decode batch. #running-req: 1, #token: 3129, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  22453. 2025-07-20 17:31:18,455 - __main__ - INFO - sglang running req: 1 queue req: 0
  22454. 2025-07-20 17:31:19,284 - sglang - INFO - [2025-07-20 17:31:19 TP0] Decode batch. #running-req: 1, #token: 3169, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  22455. 2025-07-20 17:31:19,285 - __main__ - INFO - sglang running req: 1 queue req: 0
  22456. 2025-07-20 17:31:20,098 - __main__ - INFO - Finished TaskGroup for worker on dbac13d5d8d14af821606b2b6fcec79288c911ad
  22457. 2025-07-20 17:31:20,098 - __main__ - INFO - Got 1 docs for dbac13d5d8d14af821606b2b6fcec79288c911ad
  22458. 2025-07-20 17:31:20,099 - __main__ - INFO - Worker 0 processing work item a516ff5c967066055babccbea12ff6a88bdfe9b5
  22459. 2025-07-20 17:31:20,099 - __main__ - INFO - Created all tasks for a516ff5c967066055babccbea12ff6a88bdfe9b5
  22460. 2025-07-20 17:31:20,102 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900901.pdf in worker 0
  22461. 2025-07-20 17:31:20,226 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-6
  22462. 2025-07-20 17:31:20,246 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-1
  22463. 2025-07-20 17:31:20,261 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-3
  22464. 2025-07-20 17:31:20,268 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-2
  22465. 2025-07-20 17:31:20,306 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-4
  22466. 2025-07-20 17:31:20,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900901.pdf-5
  22467. 2025-07-20 17:31:20,377 - sglang - INFO - [2025-07-20 17:31:20 TP0] Prefill batch. #new-seq: 1, #new-token: 2595, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22468. 2025-07-20 17:31:20,377 - __main__ - INFO - sglang running req: 0 queue req: 0
  22469. 2025-07-20 17:31:21,176 - sglang - INFO - [2025-07-20 17:31:21 TP0] Prefill batch. #new-seq: 5, #new-token: 10243, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.07, #running-req: 1, #queue-req: 0
  22470. 2025-07-20 17:31:21,176 - __main__ - INFO - sglang running req: 1 queue req: 0
  22471. 2025-07-20 17:31:24,384 - sglang - INFO - [2025-07-20 17:31:24 TP0] Decode batch. #running-req: 6, #token: 12844, token usage: 0.34, gen throughput (token/s): 8.82, #queue-req: 0
  22472. 2025-07-20 17:31:24,384 - __main__ - INFO - sglang running req: 6 queue req: 0
  22473. 2025-07-20 17:31:25,254 - sglang - INFO - [2025-07-20 17:31:25 TP0] Decode batch. #running-req: 6, #token: 13084, token usage: 0.34, gen throughput (token/s): 275.75, #queue-req: 0
  22474. 2025-07-20 17:31:25,254 - __main__ - INFO - sglang running req: 6 queue req: 0
  22475. 2025-07-20 17:31:26,127 - sglang - INFO - [2025-07-20 17:31:26 TP0] Decode batch. #running-req: 6, #token: 13324, token usage: 0.35, gen throughput (token/s): 274.84, #queue-req: 0
  22476. 2025-07-20 17:31:26,128 - __main__ - INFO - sglang running req: 6 queue req: 0
  22477. 2025-07-20 17:31:26,800 - __main__ - INFO - Queue remaining: 20
  22478. 2025-07-20 17:31:26,800 - __main__ - INFO -
  22479. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22480. ----------------------------------------------------------------------------------
  22481. finished_input_tokens 413.08 551.48
  22482. finished_output_tokens 100.32 133.93
  22483. sglang_input_tokens 418.51 558.73
  22484. sglang_output_tokens 107.81 143.93
  22485. 2025-07-20 17:31:26,800 - __main__ - INFO -
  22486. Worker ID | started
  22487. ----------+--------
  22488. 0 | 6
  22489. 2025-07-20 17:31:27,003 - sglang - INFO - [2025-07-20 17:31:27 TP0] Decode batch. #running-req: 6, #token: 13564, token usage: 0.36, gen throughput (token/s): 274.19, #queue-req: 0
  22490. 2025-07-20 17:31:27,003 - __main__ - INFO - sglang running req: 6 queue req: 0
  22491. 2025-07-20 17:31:27,877 - sglang - INFO - [2025-07-20 17:31:27 TP0] Decode batch. #running-req: 6, #token: 13804, token usage: 0.36, gen throughput (token/s): 274.54, #queue-req: 0
  22492. 2025-07-20 17:31:27,877 - __main__ - INFO - sglang running req: 6 queue req: 0
  22493. 2025-07-20 17:31:28,752 - sglang - INFO - [2025-07-20 17:31:28 TP0] Decode batch. #running-req: 6, #token: 14044, token usage: 0.37, gen throughput (token/s): 274.35, #queue-req: 0
  22494. 2025-07-20 17:31:28,752 - __main__ - INFO - sglang running req: 6 queue req: 0
  22495. 2025-07-20 17:31:29,626 - sglang - INFO - [2025-07-20 17:31:29 TP0] Decode batch. #running-req: 6, #token: 14284, token usage: 0.38, gen throughput (token/s): 274.36, #queue-req: 0
  22496. 2025-07-20 17:31:29,627 - __main__ - INFO - sglang running req: 6 queue req: 0
  22497. 2025-07-20 17:31:30,502 - sglang - INFO - [2025-07-20 17:31:30 TP0] Decode batch. #running-req: 5, #token: 12651, token usage: 0.33, gen throughput (token/s): 262.59, #queue-req: 0
  22498. 2025-07-20 17:31:30,502 - __main__ - INFO - sglang running req: 5 queue req: 0
  22499. 2025-07-20 17:31:31,374 - sglang - INFO - [2025-07-20 17:31:31 TP0] Decode batch. #running-req: 5, #token: 12851, token usage: 0.34, gen throughput (token/s): 229.39, #queue-req: 0
  22500. 2025-07-20 17:31:31,374 - __main__ - INFO - sglang running req: 5 queue req: 0
  22501. 2025-07-20 17:31:32,246 - sglang - INFO - [2025-07-20 17:31:32 TP0] Decode batch. #running-req: 5, #token: 13051, token usage: 0.34, gen throughput (token/s): 229.50, #queue-req: 0
  22502. 2025-07-20 17:31:32,246 - __main__ - INFO - sglang running req: 5 queue req: 0
  22503. 2025-07-20 17:31:33,117 - sglang - INFO - [2025-07-20 17:31:33 TP0] Decode batch. #running-req: 5, #token: 13251, token usage: 0.35, gen throughput (token/s): 229.53, #queue-req: 0
  22504. 2025-07-20 17:31:33,117 - __main__ - INFO - sglang running req: 5 queue req: 0
  22505. 2025-07-20 17:31:33,989 - sglang - INFO - [2025-07-20 17:31:33 TP0] Decode batch. #running-req: 5, #token: 13451, token usage: 0.35, gen throughput (token/s): 229.43, #queue-req: 0
  22506. 2025-07-20 17:31:33,989 - __main__ - INFO - sglang running req: 5 queue req: 0
  22507. 2025-07-20 17:31:34,860 - sglang - INFO - [2025-07-20 17:31:34 TP0] Decode batch. #running-req: 4, #token: 11363, token usage: 0.30, gen throughput (token/s): 216.96, #queue-req: 0
  22508. 2025-07-20 17:31:34,860 - __main__ - INFO - sglang running req: 4 queue req: 0
  22509. 2025-07-20 17:31:35,720 - sglang - INFO - [2025-07-20 17:31:35 TP0] Decode batch. #running-req: 4, #token: 11523, token usage: 0.30, gen throughput (token/s): 185.90, #queue-req: 0
  22510. 2025-07-20 17:31:35,721 - __main__ - INFO - sglang running req: 4 queue req: 0
  22511. 2025-07-20 17:31:36,581 - sglang - INFO - [2025-07-20 17:31:36 TP0] Decode batch. #running-req: 4, #token: 11683, token usage: 0.31, gen throughput (token/s): 186.01, #queue-req: 0
  22512. 2025-07-20 17:31:36,581 - __main__ - INFO - sglang running req: 4 queue req: 0
  22513. 2025-07-20 17:31:36,801 - __main__ - INFO - Queue remaining: 20
  22514. 2025-07-20 17:31:36,802 - __main__ - INFO -
  22515. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22516. ----------------------------------------------------------------------------------
  22517. finished_input_tokens 403.02 551.48
  22518. finished_output_tokens 97.88 133.93
  22519. sglang_input_tokens 416.59 514.40
  22520. sglang_output_tokens 107.00 135.11
  22521. 2025-07-20 17:31:36,802 - __main__ - INFO -
  22522. Worker ID | finished | started
  22523. ----------+----------+--------
  22524. 0 | 2 | 6
  22525. 2025-07-20 17:31:37,441 - sglang - INFO - [2025-07-20 17:31:37 TP0] Decode batch. #running-req: 4, #token: 11843, token usage: 0.31, gen throughput (token/s): 185.96, #queue-req: 0
  22526. 2025-07-20 17:31:37,441 - __main__ - INFO - sglang running req: 4 queue req: 0
  22527. 2025-07-20 17:31:38,298 - sglang - INFO - [2025-07-20 17:31:38 TP0] Decode batch. #running-req: 3, #token: 9299, token usage: 0.24, gen throughput (token/s): 152.91, #queue-req: 0
  22528. 2025-07-20 17:31:38,298 - __main__ - INFO - sglang running req: 3 queue req: 0
  22529. 2025-07-20 17:31:39,147 - sglang - INFO - [2025-07-20 17:31:39 TP0] Decode batch. #running-req: 2, #token: 6143, token usage: 0.16, gen throughput (token/s): 125.94, #queue-req: 0
  22530. 2025-07-20 17:31:39,148 - __main__ - INFO - sglang running req: 2 queue req: 0
  22531. 2025-07-20 17:31:39,987 - sglang - INFO - [2025-07-20 17:31:39 TP0] Decode batch. #running-req: 2, #token: 6223, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
  22532. 2025-07-20 17:31:39,987 - __main__ - INFO - sglang running req: 2 queue req: 0
  22533. 2025-07-20 17:31:40,826 - sglang - INFO - [2025-07-20 17:31:40 TP0] Decode batch. #running-req: 2, #token: 6303, token usage: 0.17, gen throughput (token/s): 95.34, #queue-req: 0
  22534. 2025-07-20 17:31:40,826 - __main__ - INFO - sglang running req: 2 queue req: 0
  22535. 2025-07-20 17:31:41,661 - sglang - INFO - [2025-07-20 17:31:41 TP0] Decode batch. #running-req: 1, #token: 3225, token usage: 0.08, gen throughput (token/s): 70.67, #queue-req: 0
  22536. 2025-07-20 17:31:41,661 - __main__ - INFO - sglang running req: 1 queue req: 0
  22537. 2025-07-20 17:31:42,491 - sglang - INFO - [2025-07-20 17:31:42 TP0] Decode batch. #running-req: 1, #token: 3265, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
  22538. 2025-07-20 17:31:42,491 - __main__ - INFO - sglang running req: 1 queue req: 0
  22539. 2025-07-20 17:31:43,320 - sglang - INFO - [2025-07-20 17:31:43 TP0] Decode batch. #running-req: 1, #token: 3305, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
  22540. 2025-07-20 17:31:43,321 - __main__ - INFO - sglang running req: 1 queue req: 0
  22541. 2025-07-20 17:31:44,150 - sglang - INFO - [2025-07-20 17:31:44 TP0] Decode batch. #running-req: 1, #token: 3345, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
  22542. 2025-07-20 17:31:44,151 - __main__ - INFO - sglang running req: 1 queue req: 0
  22543. 2025-07-20 17:31:44,981 - sglang - INFO - [2025-07-20 17:31:44 TP0] Decode batch. #running-req: 1, #token: 3385, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
  22544. 2025-07-20 17:31:44,982 - __main__ - INFO - sglang running req: 1 queue req: 0
  22545. 2025-07-20 17:31:45,812 - sglang - INFO - [2025-07-20 17:31:45 TP0] Decode batch. #running-req: 1, #token: 3425, token usage: 0.09, gen throughput (token/s): 48.18, #queue-req: 0
  22546. 2025-07-20 17:31:45,812 - __main__ - INFO - sglang running req: 1 queue req: 0
  22547. 2025-07-20 17:31:46,628 - __main__ - INFO - Finished TaskGroup for worker on a516ff5c967066055babccbea12ff6a88bdfe9b5
  22548. 2025-07-20 17:31:46,628 - __main__ - INFO - Got 1 docs for a516ff5c967066055babccbea12ff6a88bdfe9b5
  22549. 2025-07-20 17:31:46,629 - __main__ - INFO - Worker 0 processing work item a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
  22550. 2025-07-20 17:31:46,630 - __main__ - INFO - Created all tasks for a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
  22551. 2025-07-20 17:31:46,635 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013003.pdf in worker 0
  22552. 2025-07-20 17:31:46,733 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-6
  22553. 2025-07-20 17:31:46,761 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-4
  22554. 2025-07-20 17:31:46,772 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-3
  22555. 2025-07-20 17:31:46,776 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-2
  22556. 2025-07-20 17:31:46,802 - __main__ - INFO - Queue remaining: 19
  22557. 2025-07-20 17:31:46,803 - __main__ - INFO -
  22558. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22559. ----------------------------------------------------------------------------------
  22560. finished_input_tokens 423.96 594.28
  22561. finished_output_tokens 104.70 146.76
  22562. sglang_input_tokens 429.13 529.13
  22563. sglang_output_tokens 111.83 139.47
  22564. 2025-07-20 17:31:46,803 - __main__ - INFO -
  22565. Worker ID | started
  22566. ----------+--------
  22567. 0 | 6
  22568. 2025-07-20 17:31:46,813 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-1
  22569. 2025-07-20 17:31:46,896 - sglang - INFO - [2025-07-20 17:31:46 TP0] Prefill batch. #new-seq: 1, #new-token: 1353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22570. 2025-07-20 17:31:46,896 - __main__ - INFO - sglang running req: 0 queue req: 0
  22571. 2025-07-20 17:31:46,903 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013003.pdf-5
  22572. 2025-07-20 17:31:47,428 - sglang - INFO - [2025-07-20 17:31:47 TP0] Prefill batch. #new-seq: 5, #new-token: 10578, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  22573. 2025-07-20 17:31:47,428 - __main__ - INFO - sglang running req: 1 queue req: 0
  22574. 2025-07-20 17:31:50,714 - sglang - INFO - [2025-07-20 17:31:50 TP0] Decode batch. #running-req: 6, #token: 11937, token usage: 0.31, gen throughput (token/s): 9.18, #queue-req: 0
  22575. 2025-07-20 17:31:50,714 - __main__ - INFO - sglang running req: 6 queue req: 0
  22576. 2025-07-20 17:31:51,583 - sglang - INFO - [2025-07-20 17:31:51 TP0] Decode batch. #running-req: 6, #token: 12177, token usage: 0.32, gen throughput (token/s): 276.28, #queue-req: 0
  22577. 2025-07-20 17:31:51,583 - __main__ - INFO - sglang running req: 6 queue req: 0
  22578. 2025-07-20 17:31:52,455 - sglang - INFO - [2025-07-20 17:31:52 TP0] Decode batch. #running-req: 6, #token: 12417, token usage: 0.33, gen throughput (token/s): 275.21, #queue-req: 0
  22579. 2025-07-20 17:31:52,455 - __main__ - INFO - sglang running req: 6 queue req: 0
  22580. 2025-07-20 17:31:53,326 - sglang - INFO - [2025-07-20 17:31:53 TP0] Decode batch. #running-req: 6, #token: 12657, token usage: 0.33, gen throughput (token/s): 275.27, #queue-req: 0
  22581. 2025-07-20 17:31:53,327 - __main__ - INFO - sglang running req: 6 queue req: 0
  22582. 2025-07-20 17:31:54,199 - sglang - INFO - [2025-07-20 17:31:54 TP0] Decode batch. #running-req: 6, #token: 12897, token usage: 0.34, gen throughput (token/s): 275.21, #queue-req: 0
  22583. 2025-07-20 17:31:54,199 - __main__ - INFO - sglang running req: 6 queue req: 0
  22584. 2025-07-20 17:31:55,071 - sglang - INFO - [2025-07-20 17:31:55 TP0] Decode batch. #running-req: 6, #token: 13137, token usage: 0.35, gen throughput (token/s): 275.10, #queue-req: 0
  22585. 2025-07-20 17:31:55,071 - __main__ - INFO - sglang running req: 6 queue req: 0
  22586. 2025-07-20 17:31:55,942 - sglang - INFO - [2025-07-20 17:31:55 TP0] Decode batch. #running-req: 5, #token: 11783, token usage: 0.31, gen throughput (token/s): 259.32, #queue-req: 0
  22587. 2025-07-20 17:31:55,943 - __main__ - INFO - sglang running req: 5 queue req: 0
  22588. 2025-07-20 17:31:56,804 - __main__ - INFO - Queue remaining: 19
  22589. 2025-07-20 17:31:56,805 - __main__ - INFO -
  22590. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22591. ----------------------------------------------------------------------------------
  22592. finished_input_tokens 414.11 594.28
  22593. finished_output_tokens 102.27 146.76
  22594. sglang_input_tokens 426.37 539.47
  22595. sglang_output_tokens 110.40 141.14
  22596. 2025-07-20 17:31:56,805 - __main__ - INFO -
  22597. Worker ID | finished | started
  22598. ----------+----------+--------
  22599. 0 | 2 | 6
  22600. 2025-07-20 17:31:56,809 - sglang - INFO - [2025-07-20 17:31:56 TP0] Decode batch. #running-req: 4, #token: 9953, token usage: 0.26, gen throughput (token/s): 220.46, #queue-req: 0
  22601. 2025-07-20 17:31:56,809 - __main__ - INFO - sglang running req: 4 queue req: 0
  22602. 2025-07-20 17:31:57,665 - sglang - INFO - [2025-07-20 17:31:57 TP0] Decode batch. #running-req: 3, #token: 7607, token usage: 0.20, gen throughput (token/s): 177.50, #queue-req: 0
  22603. 2025-07-20 17:31:57,665 - __main__ - INFO - sglang running req: 3 queue req: 0
  22604. 2025-07-20 17:31:58,516 - sglang - INFO - [2025-07-20 17:31:58 TP0] Decode batch. #running-req: 2, #token: 5645, token usage: 0.15, gen throughput (token/s): 138.62, #queue-req: 0
  22605. 2025-07-20 17:31:58,517 - __main__ - INFO - sglang running req: 2 queue req: 0
  22606. 2025-07-20 17:31:59,355 - sglang - INFO - [2025-07-20 17:31:59 TP0] Decode batch. #running-req: 2, #token: 5725, token usage: 0.15, gen throughput (token/s): 95.40, #queue-req: 0
  22607. 2025-07-20 17:31:59,355 - __main__ - INFO - sglang running req: 2 queue req: 0
  22608. 2025-07-20 17:32:00,193 - sglang - INFO - [2025-07-20 17:32:00 TP0] Decode batch. #running-req: 2, #token: 5805, token usage: 0.15, gen throughput (token/s): 95.46, #queue-req: 0
  22609. 2025-07-20 17:32:00,193 - __main__ - INFO - sglang running req: 2 queue req: 0
  22610. 2025-07-20 17:32:01,033 - sglang - INFO - [2025-07-20 17:32:01 TP0] Decode batch. #running-req: 2, #token: 5885, token usage: 0.15, gen throughput (token/s): 95.29, #queue-req: 0
  22611. 2025-07-20 17:32:01,033 - __main__ - INFO - sglang running req: 2 queue req: 0
  22612. 2025-07-20 17:32:01,873 - sglang - INFO - [2025-07-20 17:32:01 TP0] Decode batch. #running-req: 2, #token: 5965, token usage: 0.16, gen throughput (token/s): 95.23, #queue-req: 0
  22613. 2025-07-20 17:32:01,873 - __main__ - INFO - sglang running req: 2 queue req: 0
  22614. 2025-07-20 17:32:02,712 - sglang - INFO - [2025-07-20 17:32:02 TP0] Decode batch. #running-req: 2, #token: 6045, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
  22615. 2025-07-20 17:32:02,712 - __main__ - INFO - sglang running req: 2 queue req: 0
  22616. 2025-07-20 17:32:03,552 - sglang - INFO - [2025-07-20 17:32:03 TP0] Decode batch. #running-req: 2, #token: 6125, token usage: 0.16, gen throughput (token/s): 95.19, #queue-req: 0
  22617. 2025-07-20 17:32:03,553 - __main__ - INFO - sglang running req: 2 queue req: 0
  22618. 2025-07-20 17:32:04,389 - sglang - INFO - [2025-07-20 17:32:04 TP0] Decode batch. #running-req: 1, #token: 3058, token usage: 0.08, gen throughput (token/s): 75.30, #queue-req: 0
  22619. 2025-07-20 17:32:04,389 - __main__ - INFO - sglang running req: 1 queue req: 0
  22620. 2025-07-20 17:32:05,219 - sglang - INFO - [2025-07-20 17:32:05 TP0] Decode batch. #running-req: 1, #token: 3098, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  22621. 2025-07-20 17:32:05,219 - __main__ - INFO - sglang running req: 1 queue req: 0
  22622. 2025-07-20 17:32:06,048 - sglang - INFO - [2025-07-20 17:32:06 TP0] Decode batch. #running-req: 1, #token: 3138, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
  22623. 2025-07-20 17:32:06,048 - __main__ - INFO - sglang running req: 1 queue req: 0
  22624. 2025-07-20 17:32:06,806 - __main__ - INFO - Queue remaining: 19
  22625. 2025-07-20 17:32:06,806 - __main__ - INFO -
  22626. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22627. ----------------------------------------------------------------------------------
  22628. finished_input_tokens 404.71 594.28
  22629. finished_output_tokens 99.94 146.76
  22630. sglang_input_tokens 431.24 560.84
  22631. sglang_output_tokens 110.84 145.47
  22632. 2025-07-20 17:32:06,806 - __main__ - INFO -
  22633. Worker ID | finished | started
  22634. ----------+----------+--------
  22635. 0 | 5 | 6
  22636. 2025-07-20 17:32:06,878 - sglang - INFO - [2025-07-20 17:32:06 TP0] Decode batch. #running-req: 1, #token: 3178, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  22637. 2025-07-20 17:32:06,878 - __main__ - INFO - sglang running req: 1 queue req: 0
  22638. 2025-07-20 17:32:07,257 - __main__ - INFO - Finished TaskGroup for worker on a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
  22639. 2025-07-20 17:32:07,257 - __main__ - INFO - Got 1 docs for a7cda58bb6cdd49b7ffd2f6d48a871b4e1da7e62
  22640. 2025-07-20 17:32:07,259 - __main__ - INFO - Worker 0 processing work item e4811c9442eb8e0a3b6177e544c95e0299d41166
  22641. 2025-07-20 17:32:07,259 - __main__ - INFO - Created all tasks for e4811c9442eb8e0a3b6177e544c95e0299d41166
  22642. 2025-07-20 17:32:07,265 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013004.pdf in worker 0
  22643. 2025-07-20 17:32:07,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-6
  22644. 2025-07-20 17:32:07,388 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-4
  22645. 2025-07-20 17:32:07,407 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-3
  22646. 2025-07-20 17:32:07,411 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-2
  22647. 2025-07-20 17:32:07,446 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-1
  22648. 2025-07-20 17:32:07,506 - sglang - INFO - [2025-07-20 17:32:07 TP0] Prefill batch. #new-seq: 1, #new-token: 1353, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22649. 2025-07-20 17:32:07,506 - __main__ - INFO - sglang running req: 0 queue req: 0
  22650. 2025-07-20 17:32:07,526 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013004.pdf-5
  22651. 2025-07-20 17:32:08,038 - sglang - INFO - [2025-07-20 17:32:08 TP0] Prefill batch. #new-seq: 5, #new-token: 10553, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  22652. 2025-07-20 17:32:08,039 - __main__ - INFO - sglang running req: 1 queue req: 0
  22653. 2025-07-20 17:32:11,779 - sglang - INFO - [2025-07-20 17:32:11 TP0] Decode batch. #running-req: 6, #token: 12038, token usage: 0.32, gen throughput (token/s): 30.61, #queue-req: 0
  22654. 2025-07-20 17:32:11,779 - __main__ - INFO - sglang running req: 6 queue req: 0
  22655. 2025-07-20 17:32:12,650 - sglang - INFO - [2025-07-20 17:32:12 TP0] Decode batch. #running-req: 6, #token: 12278, token usage: 0.32, gen throughput (token/s): 275.43, #queue-req: 0
  22656. 2025-07-20 17:32:12,650 - __main__ - INFO - sglang running req: 6 queue req: 0
  22657. 2025-07-20 17:32:13,521 - sglang - INFO - [2025-07-20 17:32:13 TP0] Decode batch. #running-req: 6, #token: 12518, token usage: 0.33, gen throughput (token/s): 275.44, #queue-req: 0
  22658. 2025-07-20 17:32:13,522 - __main__ - INFO - sglang running req: 6 queue req: 0
  22659. 2025-07-20 17:32:14,392 - sglang - INFO - [2025-07-20 17:32:14 TP0] Decode batch. #running-req: 6, #token: 12758, token usage: 0.34, gen throughput (token/s): 275.70, #queue-req: 0
  22660. 2025-07-20 17:32:14,392 - __main__ - INFO - sglang running req: 6 queue req: 0
  22661. 2025-07-20 17:32:15,263 - sglang - INFO - [2025-07-20 17:32:15 TP0] Decode batch. #running-req: 6, #token: 12998, token usage: 0.34, gen throughput (token/s): 275.36, #queue-req: 0
  22662. 2025-07-20 17:32:15,264 - __main__ - INFO - sglang running req: 6 queue req: 0
  22663. 2025-07-20 17:32:16,137 - sglang - INFO - [2025-07-20 17:32:16 TP0] Decode batch. #running-req: 6, #token: 13238, token usage: 0.35, gen throughput (token/s): 274.71, #queue-req: 0
  22664. 2025-07-20 17:32:16,137 - __main__ - INFO - sglang running req: 6 queue req: 0
  22665. 2025-07-20 17:32:16,809 - __main__ - INFO - Queue remaining: 18
  22666. 2025-07-20 17:32:16,809 - __main__ - INFO -
  22667. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22668. ----------------------------------------------------------------------------------
  22669. finished_input_tokens 422.21 634.05
  22670. finished_output_tokens 103.45 155.36
  22671. sglang_input_tokens 430.04 573.41
  22672. sglang_output_tokens 110.62 148.84
  22673. 2025-07-20 17:32:16,809 - __main__ - INFO -
  22674. Worker ID | finished | started
  22675. ----------+----------+--------
  22676. 0 | 1 | 6
  22677. 2025-07-20 17:32:17,008 - sglang - INFO - [2025-07-20 17:32:17 TP0] Decode batch. #running-req: 5, #token: 11863, token usage: 0.31, gen throughput (token/s): 238.82, #queue-req: 0
  22678. 2025-07-20 17:32:17,008 - __main__ - INFO - sglang running req: 5 queue req: 0
  22679. 2025-07-20 17:32:17,866 - sglang - INFO - [2025-07-20 17:32:17 TP0] Decode batch. #running-req: 3, #token: 7537, token usage: 0.20, gen throughput (token/s): 192.24, #queue-req: 0
  22680. 2025-07-20 17:32:17,866 - __main__ - INFO - sglang running req: 3 queue req: 0
  22681. 2025-07-20 17:32:18,717 - sglang - INFO - [2025-07-20 17:32:18 TP0] Decode batch. #running-req: 3, #token: 7657, token usage: 0.20, gen throughput (token/s): 141.11, #queue-req: 0
  22682. 2025-07-20 17:32:18,717 - __main__ - INFO - sglang running req: 3 queue req: 0
  22683. 2025-07-20 17:32:19,563 - sglang - INFO - [2025-07-20 17:32:19 TP0] Decode batch. #running-req: 2, #token: 5691, token usage: 0.15, gen throughput (token/s): 118.20, #queue-req: 0
  22684. 2025-07-20 17:32:19,563 - __main__ - INFO - sglang running req: 2 queue req: 0
  22685. 2025-07-20 17:32:20,401 - sglang - INFO - [2025-07-20 17:32:20 TP0] Decode batch. #running-req: 2, #token: 5771, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
  22686. 2025-07-20 17:32:20,402 - __main__ - INFO - sglang running req: 2 queue req: 0
  22687. 2025-07-20 17:32:21,240 - sglang - INFO - [2025-07-20 17:32:21 TP0] Decode batch. #running-req: 2, #token: 5851, token usage: 0.15, gen throughput (token/s): 95.43, #queue-req: 0
  22688. 2025-07-20 17:32:21,240 - __main__ - INFO - sglang running req: 2 queue req: 0
  22689. 2025-07-20 17:32:22,078 - sglang - INFO - [2025-07-20 17:32:22 TP0] Decode batch. #running-req: 2, #token: 5931, token usage: 0.16, gen throughput (token/s): 95.38, #queue-req: 0
  22690. 2025-07-20 17:32:22,079 - __main__ - INFO - sglang running req: 2 queue req: 0
  22691. 2025-07-20 17:32:22,918 - sglang - INFO - [2025-07-20 17:32:22 TP0] Decode batch. #running-req: 2, #token: 6011, token usage: 0.16, gen throughput (token/s): 95.24, #queue-req: 0
  22692. 2025-07-20 17:32:22,919 - __main__ - INFO - sglang running req: 2 queue req: 0
  22693. 2025-07-20 17:32:23,758 - sglang - INFO - [2025-07-20 17:32:23 TP0] Decode batch. #running-req: 2, #token: 6091, token usage: 0.16, gen throughput (token/s): 95.33, #queue-req: 0
  22694. 2025-07-20 17:32:23,758 - __main__ - INFO - sglang running req: 2 queue req: 0
  22695. 2025-07-20 17:32:24,597 - sglang - INFO - [2025-07-20 17:32:24 TP0] Decode batch. #running-req: 2, #token: 6171, token usage: 0.16, gen throughput (token/s): 95.35, #queue-req: 0
  22696. 2025-07-20 17:32:24,597 - __main__ - INFO - sglang running req: 2 queue req: 0
  22697. 2025-07-20 17:32:25,427 - sglang - INFO - [2025-07-20 17:32:25 TP0] Decode batch. #running-req: 1, #token: 3081, token usage: 0.08, gen throughput (token/s): 51.77, #queue-req: 0
  22698. 2025-07-20 17:32:25,427 - __main__ - INFO - sglang running req: 1 queue req: 0
  22699. 2025-07-20 17:32:26,257 - sglang - INFO - [2025-07-20 17:32:26 TP0] Decode batch. #running-req: 1, #token: 3121, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
  22700. 2025-07-20 17:32:26,257 - __main__ - INFO - sglang running req: 1 queue req: 0
  22701. 2025-07-20 17:32:26,810 - __main__ - INFO - Queue remaining: 18
  22702. 2025-07-20 17:32:26,811 - __main__ - INFO -
  22703. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22704. ----------------------------------------------------------------------------------
  22705. finished_input_tokens 413.04 634.05
  22706. finished_output_tokens 101.20 155.36
  22707. sglang_input_tokens 438.36 593.28
  22708. sglang_output_tokens 111.61 144.04
  22709. 2025-07-20 17:32:26,811 - __main__ - INFO -
  22710. Worker ID | finished | started
  22711. ----------+----------+--------
  22712. 0 | 5 | 6
  22713. 2025-07-20 17:32:27,086 - sglang - INFO - [2025-07-20 17:32:27 TP0] Decode batch. #running-req: 1, #token: 3161, token usage: 0.08, gen throughput (token/s): 48.25, #queue-req: 0
  22714. 2025-07-20 17:32:27,086 - __main__ - INFO - sglang running req: 1 queue req: 0
  22715. 2025-07-20 17:32:27,859 - __main__ - INFO - Finished TaskGroup for worker on e4811c9442eb8e0a3b6177e544c95e0299d41166
  22716. 2025-07-20 17:32:27,859 - __main__ - INFO - Got 1 docs for e4811c9442eb8e0a3b6177e544c95e0299d41166
  22717. 2025-07-20 17:32:27,860 - __main__ - INFO - Worker 0 processing work item 95eb6113ad117cc5bc5c734f7ca31625e117229d
  22718. 2025-07-20 17:32:27,861 - __main__ - INFO - Created all tasks for 95eb6113ad117cc5bc5c734f7ca31625e117229d
  22719. 2025-07-20 17:32:27,867 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901002.pdf in worker 0
  22720. 2025-07-20 17:32:27,973 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-6
  22721. 2025-07-20 17:32:28,026 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-3
  22722. 2025-07-20 17:32:28,042 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-2
  22723. 2025-07-20 17:32:28,048 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-1
  22724. 2025-07-20 17:32:28,109 - sglang - INFO - [2025-07-20 17:32:28 TP0] Prefill batch. #new-seq: 1, #new-token: 1457, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22725. 2025-07-20 17:32:28,109 - __main__ - INFO - sglang running req: 0 queue req: 0
  22726. 2025-07-20 17:32:28,110 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-5
  22727. 2025-07-20 17:32:28,118 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901002.pdf-4
  22728. 2025-07-20 17:32:28,684 - sglang - INFO - [2025-07-20 17:32:28 TP0] Prefill batch. #new-seq: 5, #new-token: 11322, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  22729. 2025-07-20 17:32:28,684 - __main__ - INFO - sglang running req: 1 queue req: 0
  22730. 2025-07-20 17:32:32,162 - sglang - INFO - [2025-07-20 17:32:32 TP0] Decode batch. #running-req: 6, #token: 12797, token usage: 0.34, gen throughput (token/s): 10.84, #queue-req: 0
  22731. 2025-07-20 17:32:32,162 - __main__ - INFO - sglang running req: 6 queue req: 0
  22732. 2025-07-20 17:32:33,033 - sglang - INFO - [2025-07-20 17:32:33 TP0] Decode batch. #running-req: 6, #token: 13037, token usage: 0.34, gen throughput (token/s): 275.55, #queue-req: 0
  22733. 2025-07-20 17:32:33,033 - __main__ - INFO - sglang running req: 6 queue req: 0
  22734. 2025-07-20 17:32:33,906 - sglang - INFO - [2025-07-20 17:32:33 TP0] Decode batch. #running-req: 6, #token: 13277, token usage: 0.35, gen throughput (token/s): 274.78, #queue-req: 0
  22735. 2025-07-20 17:32:33,906 - __main__ - INFO - sglang running req: 6 queue req: 0
  22736. 2025-07-20 17:32:34,780 - sglang - INFO - [2025-07-20 17:32:34 TP0] Decode batch. #running-req: 6, #token: 13517, token usage: 0.36, gen throughput (token/s): 274.42, #queue-req: 0
  22737. 2025-07-20 17:32:34,781 - __main__ - INFO - sglang running req: 6 queue req: 0
  22738. 2025-07-20 17:32:35,656 - sglang - INFO - [2025-07-20 17:32:35 TP0] Decode batch. #running-req: 6, #token: 13757, token usage: 0.36, gen throughput (token/s): 274.22, #queue-req: 0
  22739. 2025-07-20 17:32:35,656 - __main__ - INFO - sglang running req: 6 queue req: 0
  22740. 2025-07-20 17:32:36,532 - sglang - INFO - [2025-07-20 17:32:36 TP0] Decode batch. #running-req: 6, #token: 13997, token usage: 0.37, gen throughput (token/s): 274.01, #queue-req: 0
  22741. 2025-07-20 17:32:36,532 - __main__ - INFO - sglang running req: 6 queue req: 0
  22742. 2025-07-20 17:32:36,812 - __main__ - INFO - Queue remaining: 17
  22743. 2025-07-20 17:32:36,812 - __main__ - INFO -
  22744. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22745. ----------------------------------------------------------------------------------
  22746. finished_input_tokens 429.56 673.73
  22747. finished_output_tokens 104.52 163.93
  22748. sglang_input_tokens 434.18 601.34
  22749. sglang_output_tokens 110.89 146.64
  22750. 2025-07-20 17:32:36,812 - __main__ - INFO -
  22751. Worker ID | started
  22752. ----------+--------
  22753. 0 | 6
  22754. 2025-07-20 17:32:37,408 - sglang - INFO - [2025-07-20 17:32:37 TP0] Decode batch. #running-req: 6, #token: 14237, token usage: 0.37, gen throughput (token/s): 273.87, #queue-req: 0
  22755. 2025-07-20 17:32:37,408 - __main__ - INFO - sglang running req: 6 queue req: 0
  22756. 2025-07-20 17:32:38,284 - sglang - INFO - [2025-07-20 17:32:38 TP0] Decode batch. #running-req: 5, #token: 12737, token usage: 0.34, gen throughput (token/s): 255.77, #queue-req: 0
  22757. 2025-07-20 17:32:38,284 - __main__ - INFO - sglang running req: 5 queue req: 0
  22758. 2025-07-20 17:32:39,156 - sglang - INFO - [2025-07-20 17:32:39 TP0] Decode batch. #running-req: 5, #token: 12937, token usage: 0.34, gen throughput (token/s): 229.36, #queue-req: 0
  22759. 2025-07-20 17:32:39,156 - __main__ - INFO - sglang running req: 5 queue req: 0
  22760. 2025-07-20 17:32:40,027 - sglang - INFO - [2025-07-20 17:32:40 TP0] Decode batch. #running-req: 5, #token: 13137, token usage: 0.35, gen throughput (token/s): 229.43, #queue-req: 0
  22761. 2025-07-20 17:32:40,028 - __main__ - INFO - sglang running req: 5 queue req: 0
  22762. 2025-07-20 17:32:40,899 - sglang - INFO - [2025-07-20 17:32:40 TP0] Decode batch. #running-req: 5, #token: 13337, token usage: 0.35, gen throughput (token/s): 229.41, #queue-req: 0
  22763. 2025-07-20 17:32:40,899 - __main__ - INFO - sglang running req: 5 queue req: 0
  22764. 2025-07-20 17:32:41,772 - sglang - INFO - [2025-07-20 17:32:41 TP0] Decode batch. #running-req: 5, #token: 13537, token usage: 0.36, gen throughput (token/s): 229.15, #queue-req: 0
  22765. 2025-07-20 17:32:41,772 - __main__ - INFO - sglang running req: 5 queue req: 0
  22766. 2025-07-20 17:32:42,645 - sglang - INFO - [2025-07-20 17:32:42 TP0] Decode batch. #running-req: 5, #token: 13737, token usage: 0.36, gen throughput (token/s): 228.96, #queue-req: 0
  22767. 2025-07-20 17:32:42,646 - __main__ - INFO - sglang running req: 5 queue req: 0
  22768. 2025-07-20 17:32:43,521 - sglang - INFO - [2025-07-20 17:32:43 TP0] Decode batch. #running-req: 5, #token: 13937, token usage: 0.37, gen throughput (token/s): 228.48, #queue-req: 0
  22769. 2025-07-20 17:32:43,521 - __main__ - INFO - sglang running req: 5 queue req: 0
  22770. 2025-07-20 17:32:44,393 - sglang - INFO - [2025-07-20 17:32:44 TP0] Decode batch. #running-req: 4, #token: 11695, token usage: 0.31, gen throughput (token/s): 218.86, #queue-req: 0
  22771. 2025-07-20 17:32:44,394 - __main__ - INFO - sglang running req: 4 queue req: 0
  22772. 2025-07-20 17:32:45,252 - sglang - INFO - [2025-07-20 17:32:45 TP0] Decode batch. #running-req: 3, #token: 9152, token usage: 0.24, gen throughput (token/s): 173.49, #queue-req: 0
  22773. 2025-07-20 17:32:45,253 - __main__ - INFO - sglang running req: 3 queue req: 0
  22774. 2025-07-20 17:32:46,106 - sglang - INFO - [2025-07-20 17:32:46 TP0] Decode batch. #running-req: 3, #token: 9272, token usage: 0.24, gen throughput (token/s): 140.60, #queue-req: 0
  22775. 2025-07-20 17:32:46,106 - __main__ - INFO - sglang running req: 3 queue req: 0
  22776. 2025-07-20 17:32:46,814 - __main__ - INFO - Queue remaining: 17
  22777. 2025-07-20 17:32:46,815 - __main__ - INFO -
  22778. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22779. ----------------------------------------------------------------------------------
  22780. finished_input_tokens 420.62 594.10
  22781. finished_output_tokens 102.34 144.93
  22782. sglang_input_tokens 441.81 614.34
  22783. sglang_output_tokens 112.88 150.94
  22784. 2025-07-20 17:32:46,815 - __main__ - INFO -
  22785. Worker ID | finished | started
  22786. ----------+----------+--------
  22787. 0 | 4 | 6
  22788. 2025-07-20 17:32:46,948 - sglang - INFO - [2025-07-20 17:32:46 TP0] Decode batch. #running-req: 2, #token: 6137, token usage: 0.16, gen throughput (token/s): 100.89, #queue-req: 0
  22789. 2025-07-20 17:32:46,949 - __main__ - INFO - sglang running req: 2 queue req: 0
  22790. 2025-07-20 17:32:47,788 - sglang - INFO - [2025-07-20 17:32:47 TP0] Decode batch. #running-req: 2, #token: 6217, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
  22791. 2025-07-20 17:32:47,788 - __main__ - INFO - sglang running req: 2 queue req: 0
  22792. 2025-07-20 17:32:48,627 - sglang - INFO - [2025-07-20 17:32:48 TP0] Decode batch. #running-req: 2, #token: 6297, token usage: 0.17, gen throughput (token/s): 95.31, #queue-req: 0
  22793. 2025-07-20 17:32:48,628 - __main__ - INFO - sglang running req: 2 queue req: 0
  22794. 2025-07-20 17:32:49,467 - sglang - INFO - [2025-07-20 17:32:49 TP0] Decode batch. #running-req: 2, #token: 6377, token usage: 0.17, gen throughput (token/s): 95.27, #queue-req: 0
  22795. 2025-07-20 17:32:49,467 - __main__ - INFO - sglang running req: 2 queue req: 0
  22796. 2025-07-20 17:32:50,308 - sglang - INFO - [2025-07-20 17:32:50 TP0] Decode batch. #running-req: 2, #token: 6457, token usage: 0.17, gen throughput (token/s): 95.11, #queue-req: 0
  22797. 2025-07-20 17:32:50,309 - __main__ - INFO - sglang running req: 2 queue req: 0
  22798. 2025-07-20 17:32:51,143 - sglang - INFO - [2025-07-20 17:32:51 TP0] Decode batch. #running-req: 1, #token: 3307, token usage: 0.09, gen throughput (token/s): 63.52, #queue-req: 0
  22799. 2025-07-20 17:32:51,143 - __main__ - INFO - sglang running req: 1 queue req: 0
  22800. 2025-07-20 17:32:51,973 - sglang - INFO - [2025-07-20 17:32:51 TP0] Decode batch. #running-req: 1, #token: 3347, token usage: 0.09, gen throughput (token/s): 48.18, #queue-req: 0
  22801. 2025-07-20 17:32:51,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  22802. 2025-07-20 17:32:52,804 - sglang - INFO - [2025-07-20 17:32:52 TP0] Decode batch. #running-req: 1, #token: 3387, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
  22803. 2025-07-20 17:32:52,804 - __main__ - INFO - sglang running req: 1 queue req: 0
  22804. 2025-07-20 17:32:53,620 - __main__ - INFO - Finished TaskGroup for worker on 95eb6113ad117cc5bc5c734f7ca31625e117229d
  22805. 2025-07-20 17:32:53,620 - __main__ - INFO - Got 1 docs for 95eb6113ad117cc5bc5c734f7ca31625e117229d
  22806. 2025-07-20 17:32:53,622 - __main__ - INFO - Worker 0 processing work item f5bd195da84dc4c9a132080ffb1a40239bb6d12b
  22807. 2025-07-20 17:32:53,622 - __main__ - INFO - Created all tasks for f5bd195da84dc4c9a132080ffb1a40239bb6d12b
  22808. 2025-07-20 17:32:53,628 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106013002.pdf in worker 0
  22809. 2025-07-20 17:32:53,724 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-6
  22810. 2025-07-20 17:32:53,751 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-4
  22811. 2025-07-20 17:32:53,764 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-2
  22812. 2025-07-20 17:32:53,769 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-3
  22813. 2025-07-20 17:32:53,805 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-1
  22814. 2025-07-20 17:32:53,884 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013002.pdf-5
  22815. 2025-07-20 17:32:53,890 - sglang - INFO - [2025-07-20 17:32:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1749, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22816. 2025-07-20 17:32:53,890 - __main__ - INFO - sglang running req: 0 queue req: 0
  22817. 2025-07-20 17:32:54,517 - sglang - INFO - [2025-07-20 17:32:54 TP0] Prefill batch. #new-seq: 5, #new-token: 10159, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  22818. 2025-07-20 17:32:54,517 - __main__ - INFO - sglang running req: 1 queue req: 0
  22819. 2025-07-20 17:32:56,816 - __main__ - INFO - Queue remaining: 16
  22820. 2025-07-20 17:32:56,816 - __main__ - INFO -
  22821. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22822. ----------------------------------------------------------------------------------
  22823. finished_input_tokens 438.10 636.69
  22824. finished_output_tokens 108.26 158.01
  22825. sglang_input_tokens 442.53 600.97
  22826. sglang_output_tokens 114.37 151.97
  22827. 2025-07-20 17:32:56,816 - __main__ - INFO -
  22828. Worker ID | started
  22829. ----------+--------
  22830. 0 | 6
  22831. 2025-07-20 17:32:57,709 - sglang - INFO - [2025-07-20 17:32:57 TP0] Decode batch. #running-req: 6, #token: 11914, token usage: 0.31, gen throughput (token/s): 9.17, #queue-req: 0
  22832. 2025-07-20 17:32:57,709 - __main__ - INFO - sglang running req: 6 queue req: 0
  22833. 2025-07-20 17:32:58,578 - sglang - INFO - [2025-07-20 17:32:58 TP0] Decode batch. #running-req: 6, #token: 12154, token usage: 0.32, gen throughput (token/s): 276.27, #queue-req: 0
  22834. 2025-07-20 17:32:58,578 - __main__ - INFO - sglang running req: 6 queue req: 0
  22835. 2025-07-20 17:32:59,452 - sglang - INFO - [2025-07-20 17:32:59 TP0] Decode batch. #running-req: 6, #token: 12394, token usage: 0.33, gen throughput (token/s): 274.68, #queue-req: 0
  22836. 2025-07-20 17:32:59,452 - __main__ - INFO - sglang running req: 6 queue req: 0
  22837. 2025-07-20 17:33:00,324 - sglang - INFO - [2025-07-20 17:33:00 TP0] Decode batch. #running-req: 6, #token: 12634, token usage: 0.33, gen throughput (token/s): 275.24, #queue-req: 0
  22838. 2025-07-20 17:33:00,324 - __main__ - INFO - sglang running req: 6 queue req: 0
  22839. 2025-07-20 17:33:01,196 - sglang - INFO - [2025-07-20 17:33:01 TP0] Decode batch. #running-req: 6, #token: 12874, token usage: 0.34, gen throughput (token/s): 275.08, #queue-req: 0
  22840. 2025-07-20 17:33:01,196 - __main__ - INFO - sglang running req: 6 queue req: 0
  22841. 2025-07-20 17:33:02,069 - sglang - INFO - [2025-07-20 17:33:02 TP0] Decode batch. #running-req: 6, #token: 13114, token usage: 0.35, gen throughput (token/s): 275.10, #queue-req: 0
  22842. 2025-07-20 17:33:02,069 - __main__ - INFO - sglang running req: 6 queue req: 0
  22843. 2025-07-20 17:33:02,940 - sglang - INFO - [2025-07-20 17:33:02 TP0] Decode batch. #running-req: 5, #token: 11760, token usage: 0.31, gen throughput (token/s): 262.67, #queue-req: 0
  22844. 2025-07-20 17:33:02,941 - __main__ - INFO - sglang running req: 5 queue req: 0
  22845. 2025-07-20 17:33:03,810 - sglang - INFO - [2025-07-20 17:33:03 TP0] Decode batch. #running-req: 5, #token: 11960, token usage: 0.31, gen throughput (token/s): 230.04, #queue-req: 0
  22846. 2025-07-20 17:33:03,810 - __main__ - INFO - sglang running req: 5 queue req: 0
  22847. 2025-07-20 17:33:04,666 - sglang - INFO - [2025-07-20 17:33:04 TP0] Decode batch. #running-req: 3, #token: 7608, token usage: 0.20, gen throughput (token/s): 163.60, #queue-req: 0
  22848. 2025-07-20 17:33:04,666 - __main__ - INFO - sglang running req: 3 queue req: 0
  22849. 2025-07-20 17:33:05,517 - sglang - INFO - [2025-07-20 17:33:05 TP0] Decode batch. #running-req: 2, #token: 5646, token usage: 0.15, gen throughput (token/s): 139.67, #queue-req: 0
  22850. 2025-07-20 17:33:05,518 - __main__ - INFO - sglang running req: 2 queue req: 0
  22851. 2025-07-20 17:33:06,356 - sglang - INFO - [2025-07-20 17:33:06 TP0] Decode batch. #running-req: 2, #token: 5726, token usage: 0.15, gen throughput (token/s): 95.40, #queue-req: 0
  22852. 2025-07-20 17:33:06,356 - __main__ - INFO - sglang running req: 2 queue req: 0
  22853. 2025-07-20 17:33:06,817 - __main__ - INFO - Queue remaining: 16
  22854. 2025-07-20 17:33:06,818 - __main__ - INFO -
  22855. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22856. ----------------------------------------------------------------------------------
  22857. finished_input_tokens 429.34 636.69
  22858. finished_output_tokens 106.09 158.01
  22859. sglang_input_tokens 447.64 599.13
  22860. sglang_output_tokens 114.44 148.91
  22861. 2025-07-20 17:33:06,818 - __main__ - INFO -
  22862. Worker ID | finished | started
  22863. ----------+----------+--------
  22864. 0 | 4 | 6
  22865. 2025-07-20 17:33:07,195 - sglang - INFO - [2025-07-20 17:33:07 TP0] Decode batch. #running-req: 2, #token: 5806, token usage: 0.15, gen throughput (token/s): 95.41, #queue-req: 0
  22866. 2025-07-20 17:33:07,195 - __main__ - INFO - sglang running req: 2 queue req: 0
  22867. 2025-07-20 17:33:08,034 - sglang - INFO - [2025-07-20 17:33:08 TP0] Decode batch. #running-req: 2, #token: 5886, token usage: 0.15, gen throughput (token/s): 95.26, #queue-req: 0
  22868. 2025-07-20 17:33:08,035 - __main__ - INFO - sglang running req: 2 queue req: 0
  22869. 2025-07-20 17:33:08,874 - sglang - INFO - [2025-07-20 17:33:08 TP0] Decode batch. #running-req: 2, #token: 5966, token usage: 0.16, gen throughput (token/s): 95.25, #queue-req: 0
  22870. 2025-07-20 17:33:08,875 - __main__ - INFO - sglang running req: 2 queue req: 0
  22871. 2025-07-20 17:33:09,714 - sglang - INFO - [2025-07-20 17:33:09 TP0] Decode batch. #running-req: 2, #token: 6046, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
  22872. 2025-07-20 17:33:09,714 - __main__ - INFO - sglang running req: 2 queue req: 0
  22873. 2025-07-20 17:33:10,553 - sglang - INFO - [2025-07-20 17:33:10 TP0] Decode batch. #running-req: 2, #token: 6126, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
  22874. 2025-07-20 17:33:10,553 - __main__ - INFO - sglang running req: 2 queue req: 0
  22875. 2025-07-20 17:33:11,390 - sglang - INFO - [2025-07-20 17:33:11 TP0] Decode batch. #running-req: 1, #token: 3058, token usage: 0.08, gen throughput (token/s): 78.85, #queue-req: 0
  22876. 2025-07-20 17:33:11,391 - __main__ - INFO - sglang running req: 1 queue req: 0
  22877. 2025-07-20 17:33:12,221 - sglang - INFO - [2025-07-20 17:33:12 TP0] Decode batch. #running-req: 1, #token: 3098, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  22878. 2025-07-20 17:33:12,221 - __main__ - INFO - sglang running req: 1 queue req: 0
  22879. 2025-07-20 17:33:13,051 - sglang - INFO - [2025-07-20 17:33:13 TP0] Decode batch. #running-req: 1, #token: 3138, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
  22880. 2025-07-20 17:33:13,051 - __main__ - INFO - sglang running req: 1 queue req: 0
  22881. 2025-07-20 17:33:13,881 - sglang - INFO - [2025-07-20 17:33:13 TP0] Decode batch. #running-req: 1, #token: 3178, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
  22882. 2025-07-20 17:33:13,881 - __main__ - INFO - sglang running req: 1 queue req: 0
  22883. 2025-07-20 17:33:14,260 - __main__ - INFO - Finished TaskGroup for worker on f5bd195da84dc4c9a132080ffb1a40239bb6d12b
  22884. 2025-07-20 17:33:14,260 - __main__ - INFO - Got 1 docs for f5bd195da84dc4c9a132080ffb1a40239bb6d12b
  22885. 2025-07-20 17:33:14,262 - __main__ - INFO - Worker 0 processing work item 7815bd6305410d3cbbea8287ed60dae1462e6e65
  22886. 2025-07-20 17:33:14,262 - __main__ - INFO - Created all tasks for 7815bd6305410d3cbbea8287ed60dae1462e6e65
  22887. 2025-07-20 17:33:14,267 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106013001.pdf in worker 0
  22888. 2025-07-20 17:33:14,383 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-5
  22889. 2025-07-20 17:33:14,411 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-2
  22890. 2025-07-20 17:33:14,417 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-3
  22891. 2025-07-20 17:33:14,445 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-1
  22892. 2025-07-20 17:33:14,530 - sglang - INFO - [2025-07-20 17:33:14 TP0] Prefill batch. #new-seq: 1, #new-token: 1537, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22893. 2025-07-20 17:33:14,531 - __main__ - INFO - sglang running req: 0 queue req: 0
  22894. 2025-07-20 17:33:14,536 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106013001.pdf-4
  22895. 2025-07-20 17:33:15,118 - sglang - INFO - [2025-07-20 17:33:15 TP0] Prefill batch. #new-seq: 4, #new-token: 8980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  22896. 2025-07-20 17:33:15,118 - __main__ - INFO - sglang running req: 1 queue req: 0
  22897. 2025-07-20 17:33:16,819 - __main__ - INFO - Queue remaining: 15
  22898. 2025-07-20 17:33:16,819 - __main__ - INFO -
  22899. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22900. ----------------------------------------------------------------------------------
  22901. finished_input_tokens 444.26 607.82
  22902. finished_output_tokens 109.08 150.63
  22903. sglang_input_tokens 448.51 607.82
  22904. sglang_output_tokens 114.95 150.63
  22905. 2025-07-20 17:33:16,819 - __main__ - INFO -
  22906. Worker ID | started
  22907. ----------+--------
  22908. 0 | 5
  22909. 2025-07-20 17:33:18,331 - sglang - INFO - [2025-07-20 17:33:18 TP0] Decode batch. #running-req: 5, #token: 10627, token usage: 0.28, gen throughput (token/s): 28.76, #queue-req: 0
  22910. 2025-07-20 17:33:18,331 - __main__ - INFO - sglang running req: 5 queue req: 0
  22911. 2025-07-20 17:33:19,198 - sglang - INFO - [2025-07-20 17:33:19 TP0] Decode batch. #running-req: 5, #token: 10827, token usage: 0.29, gen throughput (token/s): 230.59, #queue-req: 0
  22912. 2025-07-20 17:33:19,199 - __main__ - INFO - sglang running req: 5 queue req: 0
  22913. 2025-07-20 17:33:20,065 - sglang - INFO - [2025-07-20 17:33:20 TP0] Decode batch. #running-req: 5, #token: 11027, token usage: 0.29, gen throughput (token/s): 230.83, #queue-req: 0
  22914. 2025-07-20 17:33:20,065 - __main__ - INFO - sglang running req: 5 queue req: 0
  22915. 2025-07-20 17:33:20,931 - sglang - INFO - [2025-07-20 17:33:20 TP0] Decode batch. #running-req: 5, #token: 11227, token usage: 0.30, gen throughput (token/s): 230.80, #queue-req: 0
  22916. 2025-07-20 17:33:20,931 - __main__ - INFO - sglang running req: 5 queue req: 0
  22917. 2025-07-20 17:33:21,799 - sglang - INFO - [2025-07-20 17:33:21 TP0] Decode batch. #running-req: 5, #token: 11427, token usage: 0.30, gen throughput (token/s): 230.41, #queue-req: 0
  22918. 2025-07-20 17:33:21,800 - __main__ - INFO - sglang running req: 5 queue req: 0
  22919. 2025-07-20 17:33:22,670 - sglang - INFO - [2025-07-20 17:33:22 TP0] Decode batch. #running-req: 5, #token: 11627, token usage: 0.31, gen throughput (token/s): 229.64, #queue-req: 0
  22920. 2025-07-20 17:33:22,670 - __main__ - INFO - sglang running req: 5 queue req: 0
  22921. 2025-07-20 17:33:23,541 - sglang - INFO - [2025-07-20 17:33:23 TP0] Decode batch. #running-req: 5, #token: 11827, token usage: 0.31, gen throughput (token/s): 229.61, #queue-req: 0
  22922. 2025-07-20 17:33:23,542 - __main__ - INFO - sglang running req: 5 queue req: 0
  22923. 2025-07-20 17:33:24,402 - sglang - INFO - [2025-07-20 17:33:24 TP0] Decode batch. #running-req: 4, #token: 10188, token usage: 0.27, gen throughput (token/s): 197.42, #queue-req: 0
  22924. 2025-07-20 17:33:24,403 - __main__ - INFO - sglang running req: 4 queue req: 0
  22925. 2025-07-20 17:33:25,260 - sglang - INFO - [2025-07-20 17:33:25 TP0] Decode batch. #running-req: 3, #token: 7677, token usage: 0.20, gen throughput (token/s): 175.99, #queue-req: 0
  22926. 2025-07-20 17:33:25,261 - __main__ - INFO - sglang running req: 3 queue req: 0
  22927. 2025-07-20 17:33:26,110 - sglang - INFO - [2025-07-20 17:33:26 TP0] Decode batch. #running-req: 2, #token: 5641, token usage: 0.15, gen throughput (token/s): 129.51, #queue-req: 0
  22928. 2025-07-20 17:33:26,110 - __main__ - INFO - sglang running req: 2 queue req: 0
  22929. 2025-07-20 17:33:26,820 - __main__ - INFO - Queue remaining: 15
  22930. 2025-07-20 17:33:26,821 - __main__ - INFO -
  22931. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22932. ----------------------------------------------------------------------------------
  22933. finished_input_tokens 435.72 607.82
  22934. finished_output_tokens 106.98 150.63
  22935. sglang_input_tokens 450.73 602.50
  22936. sglang_output_tokens 114.63 149.60
  22937. 2025-07-20 17:33:26,821 - __main__ - INFO -
  22938. Worker ID | finished | started
  22939. ----------+----------+--------
  22940. 0 | 3 | 5
  22941. 2025-07-20 17:33:26,949 - sglang - INFO - [2025-07-20 17:33:26 TP0] Decode batch. #running-req: 2, #token: 5721, token usage: 0.15, gen throughput (token/s): 95.32, #queue-req: 0
  22942. 2025-07-20 17:33:26,949 - __main__ - INFO - sglang running req: 2 queue req: 0
  22943. 2025-07-20 17:33:27,788 - sglang - INFO - [2025-07-20 17:33:27 TP0] Decode batch. #running-req: 2, #token: 5801, token usage: 0.15, gen throughput (token/s): 95.35, #queue-req: 0
  22944. 2025-07-20 17:33:27,788 - __main__ - INFO - sglang running req: 2 queue req: 0
  22945. 2025-07-20 17:33:28,627 - sglang - INFO - [2025-07-20 17:33:28 TP0] Decode batch. #running-req: 2, #token: 5881, token usage: 0.15, gen throughput (token/s): 95.31, #queue-req: 0
  22946. 2025-07-20 17:33:28,628 - __main__ - INFO - sglang running req: 2 queue req: 0
  22947. 2025-07-20 17:33:29,467 - sglang - INFO - [2025-07-20 17:33:29 TP0] Decode batch. #running-req: 2, #token: 5961, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
  22948. 2025-07-20 17:33:29,467 - __main__ - INFO - sglang running req: 2 queue req: 0
  22949. 2025-07-20 17:33:30,308 - sglang - INFO - [2025-07-20 17:33:30 TP0] Decode batch. #running-req: 2, #token: 6041, token usage: 0.16, gen throughput (token/s): 95.20, #queue-req: 0
  22950. 2025-07-20 17:33:30,308 - __main__ - INFO - sglang running req: 2 queue req: 0
  22951. 2025-07-20 17:33:31,147 - sglang - INFO - [2025-07-20 17:33:31 TP0] Decode batch. #running-req: 2, #token: 6121, token usage: 0.16, gen throughput (token/s): 95.30, #queue-req: 0
  22952. 2025-07-20 17:33:31,147 - __main__ - INFO - sglang running req: 2 queue req: 0
  22953. 2025-07-20 17:33:31,979 - sglang - INFO - [2025-07-20 17:33:31 TP0] Decode batch. #running-req: 1, #token: 3032, token usage: 0.08, gen throughput (token/s): 55.29, #queue-req: 0
  22954. 2025-07-20 17:33:31,979 - __main__ - INFO - sglang running req: 1 queue req: 0
  22955. 2025-07-20 17:33:32,809 - sglang - INFO - [2025-07-20 17:33:32 TP0] Decode batch. #running-req: 1, #token: 3072, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  22956. 2025-07-20 17:33:32,809 - __main__ - INFO - sglang running req: 1 queue req: 0
  22957. 2025-07-20 17:33:33,640 - sglang - INFO - [2025-07-20 17:33:33 TP0] Decode batch. #running-req: 1, #token: 3112, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  22958. 2025-07-20 17:33:33,640 - __main__ - INFO - sglang running req: 1 queue req: 0
  22959. 2025-07-20 17:33:34,471 - sglang - INFO - [2025-07-20 17:33:34 TP0] Decode batch. #running-req: 1, #token: 3152, token usage: 0.08, gen throughput (token/s): 48.14, #queue-req: 0
  22960. 2025-07-20 17:33:34,471 - __main__ - INFO - sglang running req: 1 queue req: 0
  22961. 2025-07-20 17:33:34,870 - __main__ - INFO - Finished TaskGroup for worker on 7815bd6305410d3cbbea8287ed60dae1462e6e65
  22962. 2025-07-20 17:33:34,871 - __main__ - INFO - Got 1 docs for 7815bd6305410d3cbbea8287ed60dae1462e6e65
  22963. 2025-07-20 17:33:34,872 - __main__ - INFO - Worker 0 processing work item 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
  22964. 2025-07-20 17:33:34,872 - __main__ - INFO - Created all tasks for 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
  22965. 2025-07-20 17:33:34,878 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106004000.pdf in worker 0
  22966. 2025-07-20 17:33:35,014 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-4
  22967. 2025-07-20 17:33:35,050 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-3
  22968. 2025-07-20 17:33:35,061 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-5
  22969. 2025-07-20 17:33:35,064 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-1
  22970. 2025-07-20 17:33:35,071 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-2
  22971. 2025-07-20 17:33:35,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-7
  22972. 2025-07-20 17:33:35,089 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106004000.pdf-6
  22973. 2025-07-20 17:33:35,185 - sglang - INFO - [2025-07-20 17:33:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2027, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  22974. 2025-07-20 17:33:35,186 - __main__ - INFO - sglang running req: 0 queue req: 0
  22975. 2025-07-20 17:33:35,858 - sglang - INFO - [2025-07-20 17:33:35 TP0] Prefill batch. #new-seq: 6, #new-token: 13399, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  22976. 2025-07-20 17:33:35,859 - __main__ - INFO - sglang running req: 1 queue req: 0
  22977. 2025-07-20 17:33:36,822 - __main__ - INFO - Queue remaining: 14
  22978. 2025-07-20 17:33:36,823 - __main__ - INFO -
  22979. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  22980. ----------------------------------------------------------------------------------
  22981. finished_input_tokens 447.33 642.87
  22982. finished_output_tokens 109.51 158.66
  22983. sglang_input_tokens 451.43 579.61
  22984. sglang_output_tokens 115.16 146.10
  22985. 2025-07-20 17:33:36,823 - __main__ - INFO -
  22986. Worker ID | started
  22987. ----------+--------
  22988. 0 | 7
  22989. 2025-07-20 17:33:40,386 - sglang - INFO - [2025-07-20 17:33:40 TP0] Decode batch. #running-req: 7, #token: 15573, token usage: 0.41, gen throughput (token/s): 28.06, #queue-req: 0
  22990. 2025-07-20 17:33:40,386 - __main__ - INFO - sglang running req: 7 queue req: 0
  22991. 2025-07-20 17:33:41,267 - sglang - INFO - [2025-07-20 17:33:41 TP0] Decode batch. #running-req: 7, #token: 15853, token usage: 0.42, gen throughput (token/s): 317.60, #queue-req: 0
  22992. 2025-07-20 17:33:41,268 - __main__ - INFO - sglang running req: 7 queue req: 0
  22993. 2025-07-20 17:33:42,150 - sglang - INFO - [2025-07-20 17:33:42 TP0] Decode batch. #running-req: 7, #token: 16133, token usage: 0.42, gen throughput (token/s): 317.42, #queue-req: 0
  22994. 2025-07-20 17:33:42,150 - __main__ - INFO - sglang running req: 7 queue req: 0
  22995. 2025-07-20 17:33:43,031 - sglang - INFO - [2025-07-20 17:33:43 TP0] Decode batch. #running-req: 7, #token: 16413, token usage: 0.43, gen throughput (token/s): 317.57, #queue-req: 0
  22996. 2025-07-20 17:33:43,032 - __main__ - INFO - sglang running req: 7 queue req: 0
  22997. 2025-07-20 17:33:43,915 - sglang - INFO - [2025-07-20 17:33:43 TP0] Decode batch. #running-req: 7, #token: 16693, token usage: 0.44, gen throughput (token/s): 316.84, #queue-req: 0
  22998. 2025-07-20 17:33:43,915 - __main__ - INFO - sglang running req: 7 queue req: 0
  22999. 2025-07-20 17:33:44,801 - sglang - INFO - [2025-07-20 17:33:44 TP0] Decode batch. #running-req: 7, #token: 16973, token usage: 0.45, gen throughput (token/s): 315.96, #queue-req: 0
  23000. 2025-07-20 17:33:44,801 - __main__ - INFO - sglang running req: 7 queue req: 0
  23001. 2025-07-20 17:33:45,687 - sglang - INFO - [2025-07-20 17:33:45 TP0] Decode batch. #running-req: 7, #token: 17253, token usage: 0.45, gen throughput (token/s): 316.16, #queue-req: 0
  23002. 2025-07-20 17:33:45,687 - __main__ - INFO - sglang running req: 7 queue req: 0
  23003. 2025-07-20 17:33:46,572 - sglang - INFO - [2025-07-20 17:33:46 TP0] Decode batch. #running-req: 7, #token: 17533, token usage: 0.46, gen throughput (token/s): 316.17, #queue-req: 0
  23004. 2025-07-20 17:33:46,573 - __main__ - INFO - sglang running req: 7 queue req: 0
  23005. 2025-07-20 17:33:46,823 - __main__ - INFO - Queue remaining: 14
  23006. 2025-07-20 17:33:46,824 - __main__ - INFO -
  23007. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23008. ----------------------------------------------------------------------------------
  23009. finished_input_tokens 439.05 642.87
  23010. finished_output_tokens 107.48 158.66
  23011. sglang_input_tokens 443.07 579.61
  23012. sglang_output_tokens 113.03 146.10
  23013. 2025-07-20 17:33:46,824 - __main__ - INFO -
  23014. Worker ID | started
  23015. ----------+--------
  23016. 0 | 7
  23017. 2025-07-20 17:33:47,458 - sglang - INFO - [2025-07-20 17:33:47 TP0] Decode batch. #running-req: 7, #token: 17813, token usage: 0.47, gen throughput (token/s): 316.31, #queue-req: 0
  23018. 2025-07-20 17:33:47,458 - __main__ - INFO - sglang running req: 7 queue req: 0
  23019. 2025-07-20 17:33:48,342 - sglang - INFO - [2025-07-20 17:33:48 TP0] Decode batch. #running-req: 6, #token: 15685, token usage: 0.41, gen throughput (token/s): 301.99, #queue-req: 0
  23020. 2025-07-20 17:33:48,342 - __main__ - INFO - sglang running req: 6 queue req: 0
  23021. 2025-07-20 17:33:49,223 - sglang - INFO - [2025-07-20 17:33:49 TP0] Decode batch. #running-req: 6, #token: 15925, token usage: 0.42, gen throughput (token/s): 272.47, #queue-req: 0
  23022. 2025-07-20 17:33:49,223 - __main__ - INFO - sglang running req: 6 queue req: 0
  23023. 2025-07-20 17:33:50,103 - sglang - INFO - [2025-07-20 17:33:50 TP0] Decode batch. #running-req: 5, #token: 13191, token usage: 0.35, gen throughput (token/s): 262.42, #queue-req: 0
  23024. 2025-07-20 17:33:50,103 - __main__ - INFO - sglang running req: 5 queue req: 0
  23025. 2025-07-20 17:33:50,976 - sglang - INFO - [2025-07-20 17:33:50 TP0] Decode batch. #running-req: 5, #token: 13391, token usage: 0.35, gen throughput (token/s): 229.06, #queue-req: 0
  23026. 2025-07-20 17:33:50,976 - __main__ - INFO - sglang running req: 5 queue req: 0
  23027. 2025-07-20 17:33:51,841 - sglang - INFO - [2025-07-20 17:33:51 TP0] Decode batch. #running-req: 3, #token: 8173, token usage: 0.22, gen throughput (token/s): 198.80, #queue-req: 0
  23028. 2025-07-20 17:33:51,841 - __main__ - INFO - sglang running req: 3 queue req: 0
  23029. 2025-07-20 17:33:52,684 - sglang - INFO - [2025-07-20 17:33:52 TP0] Decode batch. #running-req: 2, #token: 5725, token usage: 0.15, gen throughput (token/s): 105.60, #queue-req: 0
  23030. 2025-07-20 17:33:52,684 - __main__ - INFO - sglang running req: 2 queue req: 0
  23031. 2025-07-20 17:33:53,503 - __main__ - INFO - Finished TaskGroup for worker on 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
  23032. 2025-07-20 17:33:53,503 - __main__ - INFO - Got 1 docs for 1cbf4da516b0dca0de138db476a8a65d2dbc5aab
  23033. 2025-07-20 17:33:53,505 - __main__ - INFO - Worker 0 processing work item 03f19a67ca1619f854740bd806a32d7112c3c315
  23034. 2025-07-20 17:33:53,505 - __main__ - INFO - Created all tasks for 03f19a67ca1619f854740bd806a32d7112c3c315
  23035. 2025-07-20 17:33:53,512 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG3440106018000.pdf in worker 0
  23036. 2025-07-20 17:33:53,639 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-6
  23037. 2025-07-20 17:33:53,655 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-4
  23038. 2025-07-20 17:33:53,664 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-5
  23039. 2025-07-20 17:33:53,668 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-7
  23040. 2025-07-20 17:33:53,691 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-2
  23041. 2025-07-20 17:33:53,693 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-3
  23042. 2025-07-20 17:33:53,697 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-1
  23043. 2025-07-20 17:33:53,701 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-9
  23044. 2025-07-20 17:33:53,720 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106018000.pdf-8
  23045. 2025-07-20 17:33:53,788 - sglang - INFO - [2025-07-20 17:33:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1928, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23046. 2025-07-20 17:33:53,788 - __main__ - INFO - sglang running req: 0 queue req: 0
  23047. 2025-07-20 17:33:54,473 - sglang - INFO - [2025-07-20 17:33:54 TP0] Prefill batch. #new-seq: 6, #new-token: 13089, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
  23048. 2025-07-20 17:33:54,473 - __main__ - INFO - sglang running req: 1 queue req: 2
  23049. 2025-07-20 17:33:56,825 - __main__ - INFO - Queue remaining: 13
  23050. 2025-07-20 17:33:56,825 - __main__ - INFO -
  23051. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23052. ----------------------------------------------------------------------------------
  23053. finished_input_tokens 459.10 605.68
  23054. finished_output_tokens 112.15 150.76
  23055. sglang_input_tokens 463.04 592.35
  23056. sglang_output_tokens 117.60 148.92
  23057. 2025-07-20 17:33:56,826 - __main__ - INFO -
  23058. Worker ID | started
  23059. ----------+--------
  23060. 0 | 9
  23061. 2025-07-20 17:33:58,471 - sglang - INFO - [2025-07-20 17:33:58 TP0] Decode batch. #running-req: 7, #token: 15024, token usage: 0.40, gen throughput (token/s): 10.54, #queue-req: 2
  23062. 2025-07-20 17:33:58,471 - __main__ - INFO - sglang running req: 7 queue req: 2
  23063. 2025-07-20 17:33:59,347 - sglang - INFO - [2025-07-20 17:33:59 TP0] Decode batch. #running-req: 7, #token: 15304, token usage: 0.40, gen throughput (token/s): 319.30, #queue-req: 2
  23064. 2025-07-20 17:33:59,348 - __main__ - INFO - sglang running req: 7 queue req: 2
  23065. 2025-07-20 17:34:00,228 - sglang - INFO - [2025-07-20 17:34:00 TP0] Decode batch. #running-req: 7, #token: 15584, token usage: 0.41, gen throughput (token/s): 317.85, #queue-req: 2
  23066. 2025-07-20 17:34:00,229 - __main__ - INFO - sglang running req: 7 queue req: 2
  23067. 2025-07-20 17:34:01,109 - sglang - INFO - [2025-07-20 17:34:01 TP0] Decode batch. #running-req: 7, #token: 15864, token usage: 0.42, gen throughput (token/s): 317.85, #queue-req: 2
  23068. 2025-07-20 17:34:01,109 - __main__ - INFO - sglang running req: 7 queue req: 2
  23069. 2025-07-20 17:34:01,990 - sglang - INFO - [2025-07-20 17:34:01 TP0] Decode batch. #running-req: 7, #token: 16144, token usage: 0.42, gen throughput (token/s): 317.83, #queue-req: 2
  23070. 2025-07-20 17:34:01,990 - __main__ - INFO - sglang running req: 7 queue req: 2
  23071. 2025-07-20 17:34:02,871 - sglang - INFO - [2025-07-20 17:34:02 TP0] Decode batch. #running-req: 7, #token: 16424, token usage: 0.43, gen throughput (token/s): 317.77, #queue-req: 2
  23072. 2025-07-20 17:34:02,872 - __main__ - INFO - sglang running req: 7 queue req: 2
  23073. 2025-07-20 17:34:03,753 - sglang - INFO - [2025-07-20 17:34:03 TP0] Decode batch. #running-req: 7, #token: 16704, token usage: 0.44, gen throughput (token/s): 317.49, #queue-req: 2
  23074. 2025-07-20 17:34:03,753 - __main__ - INFO - sglang running req: 7 queue req: 2
  23075. 2025-07-20 17:34:04,573 - sglang - INFO - [2025-07-20 17:34:04 TP0] Prefill batch. #new-seq: 2, #new-token: 4265, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 0
  23076. 2025-07-20 17:34:04,573 - __main__ - INFO - sglang running req: 6 queue req: 0
  23077. 2025-07-20 17:34:05,970 - sglang - INFO - [2025-07-20 17:34:05 TP0] Decode batch. #running-req: 8, #token: 18762, token usage: 0.49, gen throughput (token/s): 127.19, #queue-req: 0
  23078. 2025-07-20 17:34:05,971 - __main__ - INFO - sglang running req: 8 queue req: 0
  23079. 2025-07-20 17:34:06,826 - __main__ - INFO - Queue remaining: 13
  23080. 2025-07-20 17:34:06,827 - __main__ - INFO -
  23081. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23082. ----------------------------------------------------------------------------------
  23083. finished_input_tokens 450.91 546.64
  23084. finished_output_tokens 110.15 138.33
  23085. sglang_input_tokens 466.07 567.73
  23086. sglang_output_tokens 117.10 141.32
  23087. 2025-07-20 17:34:06,827 - __main__ - INFO -
  23088. Worker ID | finished | started
  23089. ----------+----------+--------
  23090. 0 | 3 | 9
  23091. 2025-07-20 17:34:06,855 - sglang - INFO - [2025-07-20 17:34:06 TP0] Decode batch. #running-req: 6, #token: 14326, token usage: 0.38, gen throughput (token/s): 332.22, #queue-req: 0
  23092. 2025-07-20 17:34:06,855 - __main__ - INFO - sglang running req: 6 queue req: 0
  23093. 2025-07-20 17:34:07,733 - sglang - INFO - [2025-07-20 17:34:07 TP0] Decode batch. #running-req: 6, #token: 14566, token usage: 0.38, gen throughput (token/s): 273.46, #queue-req: 0
  23094. 2025-07-20 17:34:07,733 - __main__ - INFO - sglang running req: 6 queue req: 0
  23095. 2025-07-20 17:34:08,612 - sglang - INFO - [2025-07-20 17:34:08 TP0] Decode batch. #running-req: 6, #token: 14806, token usage: 0.39, gen throughput (token/s): 273.18, #queue-req: 0
  23096. 2025-07-20 17:34:08,612 - __main__ - INFO - sglang running req: 6 queue req: 0
  23097. 2025-07-20 17:34:09,491 - sglang - INFO - [2025-07-20 17:34:09 TP0] Decode batch. #running-req: 6, #token: 15046, token usage: 0.40, gen throughput (token/s): 272.84, #queue-req: 0
  23098. 2025-07-20 17:34:09,491 - __main__ - INFO - sglang running req: 6 queue req: 0
  23099. 2025-07-20 17:34:10,373 - sglang - INFO - [2025-07-20 17:34:10 TP0] Decode batch. #running-req: 6, #token: 15286, token usage: 0.40, gen throughput (token/s): 272.06, #queue-req: 0
  23100. 2025-07-20 17:34:10,374 - __main__ - INFO - sglang running req: 6 queue req: 0
  23101. 2025-07-20 17:34:11,256 - sglang - INFO - [2025-07-20 17:34:11 TP0] Decode batch. #running-req: 5, #token: 12947, token usage: 0.34, gen throughput (token/s): 258.40, #queue-req: 0
  23102. 2025-07-20 17:34:11,256 - __main__ - INFO - sglang running req: 5 queue req: 0
  23103. 2025-07-20 17:34:12,125 - sglang - INFO - [2025-07-20 17:34:12 TP0] Decode batch. #running-req: 4, #token: 10132, token usage: 0.27, gen throughput (token/s): 205.84, #queue-req: 0
  23104. 2025-07-20 17:34:12,125 - __main__ - INFO - sglang running req: 4 queue req: 0
  23105. 2025-07-20 17:34:12,986 - sglang - INFO - [2025-07-20 17:34:12 TP0] Decode batch. #running-req: 3, #token: 7479, token usage: 0.20, gen throughput (token/s): 170.85, #queue-req: 0
  23106. 2025-07-20 17:34:12,986 - __main__ - INFO - sglang running req: 3 queue req: 0
  23107. 2025-07-20 17:34:13,838 - sglang - INFO - [2025-07-20 17:34:13 TP0] Decode batch. #running-req: 3, #token: 7599, token usage: 0.20, gen throughput (token/s): 140.82, #queue-req: 0
  23108. 2025-07-20 17:34:13,838 - __main__ - INFO - sglang running req: 3 queue req: 0
  23109. 2025-07-20 17:34:14,677 - sglang - INFO - [2025-07-20 17:34:14 TP0] Decode batch. #running-req: 2, #token: 5071, token usage: 0.13, gen throughput (token/s): 103.62, #queue-req: 0
  23110. 2025-07-20 17:34:14,678 - __main__ - INFO - sglang running req: 2 queue req: 0
  23111. 2025-07-20 17:34:15,514 - sglang - INFO - [2025-07-20 17:34:15 TP0] Decode batch. #running-req: 2, #token: 5151, token usage: 0.14, gen throughput (token/s): 95.64, #queue-req: 0
  23112. 2025-07-20 17:34:15,514 - __main__ - INFO - sglang running req: 2 queue req: 0
  23113. 2025-07-20 17:34:16,353 - sglang - INFO - [2025-07-20 17:34:16 TP0] Decode batch. #running-req: 2, #token: 5231, token usage: 0.14, gen throughput (token/s): 95.34, #queue-req: 0
  23114. 2025-07-20 17:34:16,353 - __main__ - INFO - sglang running req: 2 queue req: 0
  23115. 2025-07-20 17:34:16,827 - __main__ - INFO - Queue remaining: 13
  23116. 2025-07-20 17:34:16,828 - __main__ - INFO -
  23117. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23118. ----------------------------------------------------------------------------------
  23119. finished_input_tokens 443.00 546.64
  23120. finished_output_tokens 108.22 138.33
  23121. sglang_input_tokens 473.13 593.02
  23122. sglang_output_tokens 119.06 148.84
  23123. 2025-07-20 17:34:16,828 - __main__ - INFO -
  23124. Worker ID | finished | started
  23125. ----------+----------+--------
  23126. 0 | 7 | 9
  23127. 2025-07-20 17:34:17,193 - sglang - INFO - [2025-07-20 17:34:17 TP0] Decode batch. #running-req: 2, #token: 5311, token usage: 0.14, gen throughput (token/s): 95.26, #queue-req: 0
  23128. 2025-07-20 17:34:17,193 - __main__ - INFO - sglang running req: 2 queue req: 0
  23129. 2025-07-20 17:34:18,031 - sglang - INFO - [2025-07-20 17:34:18 TP0] Decode batch. #running-req: 2, #token: 5391, token usage: 0.14, gen throughput (token/s): 95.38, #queue-req: 0
  23130. 2025-07-20 17:34:18,032 - __main__ - INFO - sglang running req: 2 queue req: 0
  23131. 2025-07-20 17:34:18,864 - sglang - INFO - [2025-07-20 17:34:18 TP0] Decode batch. #running-req: 1, #token: 2838, token usage: 0.07, gen throughput (token/s): 63.66, #queue-req: 0
  23132. 2025-07-20 17:34:18,864 - __main__ - INFO - sglang running req: 1 queue req: 0
  23133. 2025-07-20 17:34:19,098 - __main__ - INFO - Finished TaskGroup for worker on 03f19a67ca1619f854740bd806a32d7112c3c315
  23134. 2025-07-20 17:34:19,098 - __main__ - INFO - Got 1 docs for 03f19a67ca1619f854740bd806a32d7112c3c315
  23135. 2025-07-20 17:34:19,100 - __main__ - INFO - Worker 0 processing work item 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
  23136. 2025-07-20 17:34:19,100 - __main__ - INFO - Created all tasks for 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
  23137. 2025-07-20 17:34:19,106 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106041000.pdf in worker 0
  23138. 2025-07-20 17:34:19,202 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-6
  23139. 2025-07-20 17:34:19,233 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-3
  23140. 2025-07-20 17:34:19,239 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-2
  23141. 2025-07-20 17:34:19,247 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-4
  23142. 2025-07-20 17:34:19,292 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-1
  23143. 2025-07-20 17:34:19,316 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106041000.pdf-5
  23144. 2025-07-20 17:34:19,357 - sglang - INFO - [2025-07-20 17:34:19 TP0] Prefill batch. #new-seq: 1, #new-token: 1350, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23145. 2025-07-20 17:34:19,357 - __main__ - INFO - sglang running req: 0 queue req: 0
  23146. 2025-07-20 17:34:19,888 - sglang - INFO - [2025-07-20 17:34:19 TP0] Prefill batch. #new-seq: 5, #new-token: 10360, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  23147. 2025-07-20 17:34:19,888 - __main__ - INFO - sglang running req: 1 queue req: 0
  23148. 2025-07-20 17:34:23,706 - sglang - INFO - [2025-07-20 17:34:23 TP0] Decode batch. #running-req: 6, #token: 11884, token usage: 0.31, gen throughput (token/s): 38.21, #queue-req: 0
  23149. 2025-07-20 17:34:23,706 - __main__ - INFO - sglang running req: 6 queue req: 0
  23150. 2025-07-20 17:34:24,580 - sglang - INFO - [2025-07-20 17:34:24 TP0] Decode batch. #running-req: 6, #token: 12124, token usage: 0.32, gen throughput (token/s): 274.65, #queue-req: 0
  23151. 2025-07-20 17:34:24,580 - __main__ - INFO - sglang running req: 6 queue req: 0
  23152. 2025-07-20 17:34:25,453 - sglang - INFO - [2025-07-20 17:34:25 TP0] Decode batch. #running-req: 6, #token: 12364, token usage: 0.33, gen throughput (token/s): 274.75, #queue-req: 0
  23153. 2025-07-20 17:34:25,454 - __main__ - INFO - sglang running req: 6 queue req: 0
  23154. 2025-07-20 17:34:26,327 - sglang - INFO - [2025-07-20 17:34:26 TP0] Decode batch. #running-req: 6, #token: 12604, token usage: 0.33, gen throughput (token/s): 274.72, #queue-req: 0
  23155. 2025-07-20 17:34:26,327 - __main__ - INFO - sglang running req: 6 queue req: 0
  23156. 2025-07-20 17:34:26,829 - __main__ - INFO - Queue remaining: 12
  23157. 2025-07-20 17:34:26,829 - __main__ - INFO -
  23158. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23159. ----------------------------------------------------------------------------------
  23160. finished_input_tokens 468.58 610.91
  23161. finished_output_tokens 113.90 152.92
  23162. sglang_input_tokens 472.33 599.56
  23163. sglang_output_tokens 119.06 151.26
  23164. 2025-07-20 17:34:26,830 - __main__ - INFO -
  23165. Worker ID | started
  23166. ----------+--------
  23167. 0 | 6
  23168. 2025-07-20 17:34:27,203 - sglang - INFO - [2025-07-20 17:34:27 TP0] Decode batch. #running-req: 6, #token: 12844, token usage: 0.34, gen throughput (token/s): 273.85, #queue-req: 0
  23169. 2025-07-20 17:34:27,203 - __main__ - INFO - sglang running req: 6 queue req: 0
  23170. 2025-07-20 17:34:28,077 - sglang - INFO - [2025-07-20 17:34:28 TP0] Decode batch. #running-req: 5, #token: 11505, token usage: 0.30, gen throughput (token/s): 271.08, #queue-req: 0
  23171. 2025-07-20 17:34:28,077 - __main__ - INFO - sglang running req: 5 queue req: 0
  23172. 2025-07-20 17:34:28,941 - sglang - INFO - [2025-07-20 17:34:28 TP0] Decode batch. #running-req: 4, #token: 9568, token usage: 0.25, gen throughput (token/s): 210.83, #queue-req: 0
  23173. 2025-07-20 17:34:28,941 - __main__ - INFO - sglang running req: 4 queue req: 0
  23174. 2025-07-20 17:34:29,797 - sglang - INFO - [2025-07-20 17:34:29 TP0] Decode batch. #running-req: 4, #token: 9728, token usage: 0.26, gen throughput (token/s): 186.85, #queue-req: 0
  23175. 2025-07-20 17:34:29,797 - __main__ - INFO - sglang running req: 4 queue req: 0
  23176. 2025-07-20 17:34:30,650 - sglang - INFO - [2025-07-20 17:34:30 TP0] Decode batch. #running-req: 3, #token: 7580, token usage: 0.20, gen throughput (token/s): 168.78, #queue-req: 0
  23177. 2025-07-20 17:34:30,650 - __main__ - INFO - sglang running req: 3 queue req: 0
  23178. 2025-07-20 17:34:31,500 - sglang - INFO - [2025-07-20 17:34:31 TP0] Decode batch. #running-req: 3, #token: 7700, token usage: 0.20, gen throughput (token/s): 141.18, #queue-req: 0
  23179. 2025-07-20 17:34:31,500 - __main__ - INFO - sglang running req: 3 queue req: 0
  23180. 2025-07-20 17:34:32,348 - sglang - INFO - [2025-07-20 17:34:32 TP0] Decode batch. #running-req: 2, #token: 5534, token usage: 0.15, gen throughput (token/s): 123.86, #queue-req: 0
  23181. 2025-07-20 17:34:32,348 - __main__ - INFO - sglang running req: 2 queue req: 0
  23182. 2025-07-20 17:34:33,188 - sglang - INFO - [2025-07-20 17:34:33 TP0] Decode batch. #running-req: 2, #token: 5614, token usage: 0.15, gen throughput (token/s): 95.19, #queue-req: 0
  23183. 2025-07-20 17:34:33,189 - __main__ - INFO - sglang running req: 2 queue req: 0
  23184. 2025-07-20 17:34:34,027 - sglang - INFO - [2025-07-20 17:34:34 TP0] Decode batch. #running-req: 2, #token: 5694, token usage: 0.15, gen throughput (token/s): 95.43, #queue-req: 0
  23185. 2025-07-20 17:34:34,027 - __main__ - INFO - sglang running req: 2 queue req: 0
  23186. 2025-07-20 17:34:34,865 - sglang - INFO - [2025-07-20 17:34:34 TP0] Decode batch. #running-req: 2, #token: 5774, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
  23187. 2025-07-20 17:34:34,866 - __main__ - INFO - sglang running req: 2 queue req: 0
  23188. 2025-07-20 17:34:35,705 - sglang - INFO - [2025-07-20 17:34:35 TP0] Decode batch. #running-req: 2, #token: 5854, token usage: 0.15, gen throughput (token/s): 95.30, #queue-req: 0
  23189. 2025-07-20 17:34:35,705 - __main__ - INFO - sglang running req: 2 queue req: 0
  23190. 2025-07-20 17:34:36,542 - sglang - INFO - [2025-07-20 17:34:36 TP0] Decode batch. #running-req: 1, #token: 2781, token usage: 0.07, gen throughput (token/s): 81.26, #queue-req: 0
  23191. 2025-07-20 17:34:36,542 - __main__ - INFO - sglang running req: 1 queue req: 0
  23192. 2025-07-20 17:34:36,831 - __main__ - INFO - Queue remaining: 12
  23193. 2025-07-20 17:34:36,831 - __main__ - INFO -
  23194. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23195. ----------------------------------------------------------------------------------
  23196. finished_input_tokens 460.65 577.38
  23197. finished_output_tokens 111.97 145.62
  23198. sglang_input_tokens 480.51 590.70
  23199. sglang_output_tokens 120.17 148.85
  23200. 2025-07-20 17:34:36,831 - __main__ - INFO -
  23201. Worker ID | finished | started
  23202. ----------+----------+--------
  23203. 0 | 5 | 6
  23204. 2025-07-20 17:34:37,370 - sglang - INFO - [2025-07-20 17:34:37 TP0] Decode batch. #running-req: 1, #token: 2821, token usage: 0.07, gen throughput (token/s): 48.25, #queue-req: 0
  23205. 2025-07-20 17:34:37,371 - __main__ - INFO - sglang running req: 1 queue req: 0
  23206. 2025-07-20 17:34:37,874 - __main__ - INFO - Finished TaskGroup for worker on 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
  23207. 2025-07-20 17:34:37,874 - __main__ - INFO - Got 1 docs for 2b4bbfbba141c9173ab5abba31f4a4c140a0fd85
  23208. 2025-07-20 17:34:37,875 - __main__ - INFO - Worker 0 processing work item 225426c1e59a9bf843a4d1088c3c98aa0321642c
  23209. 2025-07-20 17:34:37,875 - __main__ - INFO - Created all tasks for 225426c1e59a9bf843a4d1088c3c98aa0321642c
  23210. 2025-07-20 17:34:37,881 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900701.pdf in worker 0
  23211. 2025-07-20 17:34:37,995 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-6
  23212. 2025-07-20 17:34:38,030 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-2
  23213. 2025-07-20 17:34:38,074 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-1
  23214. 2025-07-20 17:34:38,079 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-3
  23215. 2025-07-20 17:34:38,135 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-4
  23216. 2025-07-20 17:34:38,140 - sglang - INFO - [2025-07-20 17:34:38 TP0] Prefill batch. #new-seq: 1, #new-token: 1496, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23217. 2025-07-20 17:34:38,140 - __main__ - INFO - sglang running req: 0 queue req: 0
  23218. 2025-07-20 17:34:38,147 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900701.pdf-5
  23219. 2025-07-20 17:34:38,720 - sglang - INFO - [2025-07-20 17:34:38 TP0] Prefill batch. #new-seq: 5, #new-token: 11856, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  23220. 2025-07-20 17:34:38,721 - __main__ - INFO - sglang running req: 1 queue req: 0
  23221. 2025-07-20 17:34:42,572 - sglang - INFO - [2025-07-20 17:34:42 TP0] Decode batch. #running-req: 6, #token: 13448, token usage: 0.35, gen throughput (token/s): 23.07, #queue-req: 0
  23222. 2025-07-20 17:34:42,572 - __main__ - INFO - sglang running req: 6 queue req: 0
  23223. 2025-07-20 17:34:43,449 - sglang - INFO - [2025-07-20 17:34:43 TP0] Decode batch. #running-req: 6, #token: 13688, token usage: 0.36, gen throughput (token/s): 273.76, #queue-req: 0
  23224. 2025-07-20 17:34:43,449 - __main__ - INFO - sglang running req: 6 queue req: 0
  23225. 2025-07-20 17:34:44,324 - sglang - INFO - [2025-07-20 17:34:44 TP0] Decode batch. #running-req: 6, #token: 13928, token usage: 0.37, gen throughput (token/s): 274.12, #queue-req: 0
  23226. 2025-07-20 17:34:44,325 - __main__ - INFO - sglang running req: 6 queue req: 0
  23227. 2025-07-20 17:34:45,201 - sglang - INFO - [2025-07-20 17:34:45 TP0] Decode batch. #running-req: 6, #token: 14168, token usage: 0.37, gen throughput (token/s): 273.73, #queue-req: 0
  23228. 2025-07-20 17:34:45,201 - __main__ - INFO - sglang running req: 6 queue req: 0
  23229. 2025-07-20 17:34:46,080 - sglang - INFO - [2025-07-20 17:34:46 TP0] Decode batch. #running-req: 6, #token: 14408, token usage: 0.38, gen throughput (token/s): 273.10, #queue-req: 0
  23230. 2025-07-20 17:34:46,080 - __main__ - INFO - sglang running req: 6 queue req: 0
  23231. 2025-07-20 17:34:46,833 - __main__ - INFO - Queue remaining: 11
  23232. 2025-07-20 17:34:46,834 - __main__ - INFO -
  23233. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23234. ----------------------------------------------------------------------------------
  23235. finished_input_tokens 472.47 616.41
  23236. finished_output_tokens 114.33 154.09
  23237. sglang_input_tokens 476.09 597.88
  23238. sglang_output_tokens 119.33 151.16
  23239. 2025-07-20 17:34:46,834 - __main__ - INFO -
  23240. Worker ID | started
  23241. ----------+--------
  23242. 0 | 6
  23243. 2025-07-20 17:34:46,958 - sglang - INFO - [2025-07-20 17:34:46 TP0] Decode batch. #running-req: 6, #token: 14648, token usage: 0.39, gen throughput (token/s): 273.23, #queue-req: 0
  23244. 2025-07-20 17:34:46,958 - __main__ - INFO - sglang running req: 6 queue req: 0
  23245. 2025-07-20 17:34:47,832 - sglang - INFO - [2025-07-20 17:34:47 TP0] Decode batch. #running-req: 5, #token: 13136, token usage: 0.35, gen throughput (token/s): 239.11, #queue-req: 0
  23246. 2025-07-20 17:34:47,832 - __main__ - INFO - sglang running req: 5 queue req: 0
  23247. 2025-07-20 17:34:48,700 - sglang - INFO - [2025-07-20 17:34:48 TP0] Decode batch. #running-req: 4, #token: 10664, token usage: 0.28, gen throughput (token/s): 214.36, #queue-req: 0
  23248. 2025-07-20 17:34:48,700 - __main__ - INFO - sglang running req: 4 queue req: 0
  23249. 2025-07-20 17:34:49,559 - sglang - INFO - [2025-07-20 17:34:49 TP0] Decode batch. #running-req: 4, #token: 10824, token usage: 0.28, gen throughput (token/s): 186.35, #queue-req: 0
  23250. 2025-07-20 17:34:49,559 - __main__ - INFO - sglang running req: 4 queue req: 0
  23251. 2025-07-20 17:34:50,418 - sglang - INFO - [2025-07-20 17:34:50 TP0] Decode batch. #running-req: 3, #token: 8920, token usage: 0.23, gen throughput (token/s): 173.40, #queue-req: 0
  23252. 2025-07-20 17:34:50,418 - __main__ - INFO - sglang running req: 3 queue req: 0
  23253. 2025-07-20 17:34:51,272 - sglang - INFO - [2025-07-20 17:34:51 TP0] Decode batch. #running-req: 3, #token: 9040, token usage: 0.24, gen throughput (token/s): 140.49, #queue-req: 0
  23254. 2025-07-20 17:34:51,272 - __main__ - INFO - sglang running req: 3 queue req: 0
  23255. 2025-07-20 17:34:52,126 - sglang - INFO - [2025-07-20 17:34:52 TP0] Decode batch. #running-req: 3, #token: 9160, token usage: 0.24, gen throughput (token/s): 140.50, #queue-req: 0
  23256. 2025-07-20 17:34:52,126 - __main__ - INFO - sglang running req: 3 queue req: 0
  23257. 2025-07-20 17:34:52,979 - sglang - INFO - [2025-07-20 17:34:52 TP0] Decode batch. #running-req: 3, #token: 9280, token usage: 0.24, gen throughput (token/s): 140.61, #queue-req: 0
  23258. 2025-07-20 17:34:52,980 - __main__ - INFO - sglang running req: 3 queue req: 0
  23259. 2025-07-20 17:34:53,834 - sglang - INFO - [2025-07-20 17:34:53 TP0] Decode batch. #running-req: 3, #token: 9400, token usage: 0.25, gen throughput (token/s): 140.39, #queue-req: 0
  23260. 2025-07-20 17:34:53,834 - __main__ - INFO - sglang running req: 3 queue req: 0
  23261. 2025-07-20 17:34:54,690 - sglang - INFO - [2025-07-20 17:34:54 TP0] Decode batch. #running-req: 3, #token: 9520, token usage: 0.25, gen throughput (token/s): 140.16, #queue-req: 0
  23262. 2025-07-20 17:34:54,691 - __main__ - INFO - sglang running req: 3 queue req: 0
  23263. 2025-07-20 17:34:55,545 - sglang - INFO - [2025-07-20 17:34:55 TP0] Decode batch. #running-req: 3, #token: 9640, token usage: 0.25, gen throughput (token/s): 140.44, #queue-req: 0
  23264. 2025-07-20 17:34:55,545 - __main__ - INFO - sglang running req: 3 queue req: 0
  23265. 2025-07-20 17:34:56,399 - sglang - INFO - [2025-07-20 17:34:56 TP0] Decode batch. #running-req: 1, #token: 3342, token usage: 0.09, gen throughput (token/s): 135.77, #queue-req: 0
  23266. 2025-07-20 17:34:56,399 - __main__ - INFO - sglang running req: 1 queue req: 0
  23267. 2025-07-20 17:34:56,834 - __main__ - INFO - Queue remaining: 11
  23268. 2025-07-20 17:34:56,834 - __main__ - INFO -
  23269. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23270. ----------------------------------------------------------------------------------
  23271. finished_input_tokens 464.73 571.90
  23272. finished_output_tokens 112.46 143.42
  23273. sglang_input_tokens 485.76 607.46
  23274. sglang_output_tokens 120.95 150.70
  23275. 2025-07-20 17:34:56,834 - __main__ - INFO -
  23276. Worker ID | finished | started
  23277. ----------+----------+--------
  23278. 0 | 5 | 6
  23279. 2025-07-20 17:34:57,230 - sglang - INFO - [2025-07-20 17:34:57 TP0] Decode batch. #running-req: 1, #token: 3382, token usage: 0.09, gen throughput (token/s): 48.12, #queue-req: 0
  23280. 2025-07-20 17:34:57,231 - __main__ - INFO - sglang running req: 1 queue req: 0
  23281. 2025-07-20 17:34:58,062 - sglang - INFO - [2025-07-20 17:34:58 TP0] Decode batch. #running-req: 1, #token: 3422, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
  23282. 2025-07-20 17:34:58,062 - __main__ - INFO - sglang running req: 1 queue req: 0
  23283. 2025-07-20 17:34:58,893 - sglang - INFO - [2025-07-20 17:34:58 TP0] Decode batch. #running-req: 1, #token: 3462, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
  23284. 2025-07-20 17:34:58,893 - __main__ - INFO - sglang running req: 1 queue req: 0
  23285. 2025-07-20 17:34:59,724 - sglang - INFO - [2025-07-20 17:34:59 TP0] Decode batch. #running-req: 1, #token: 3502, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
  23286. 2025-07-20 17:34:59,724 - __main__ - INFO - sglang running req: 1 queue req: 0
  23287. 2025-07-20 17:35:00,556 - sglang - INFO - [2025-07-20 17:35:00 TP0] Decode batch. #running-req: 1, #token: 3542, token usage: 0.09, gen throughput (token/s): 48.08, #queue-req: 0
  23288. 2025-07-20 17:35:00,556 - __main__ - INFO - sglang running req: 1 queue req: 0
  23289. 2025-07-20 17:35:00,958 - __main__ - INFO - Finished TaskGroup for worker on 225426c1e59a9bf843a4d1088c3c98aa0321642c
  23290. 2025-07-20 17:35:00,958 - __main__ - INFO - Got 1 docs for 225426c1e59a9bf843a4d1088c3c98aa0321642c
  23291. 2025-07-20 17:35:00,959 - __main__ - INFO - Worker 0 processing work item 398aeb9cc239880a7222603994af5c4016796381
  23292. 2025-07-20 17:35:00,959 - __main__ - INFO - Created all tasks for 398aeb9cc239880a7222603994af5c4016796381
  23293. 2025-07-20 17:35:00,964 - __main__ - INFO - Got 5 pages to do for test_pdf/1144520000702630XG3440106028002.pdf in worker 0
  23294. 2025-07-20 17:35:01,021 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-5
  23295. 2025-07-20 17:35:01,076 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-2
  23296. 2025-07-20 17:35:01,103 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-3
  23297. 2025-07-20 17:35:01,151 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-1
  23298. 2025-07-20 17:35:01,158 - sglang - INFO - [2025-07-20 17:35:01 TP0] Prefill batch. #new-seq: 1, #new-token: 1102, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23299. 2025-07-20 17:35:01,158 - __main__ - INFO - sglang running req: 0 queue req: 0
  23300. 2025-07-20 17:35:01,186 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106028002.pdf-4
  23301. 2025-07-20 17:35:01,653 - sglang - INFO - [2025-07-20 17:35:01 TP0] Prefill batch. #new-seq: 4, #new-token: 8057, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.03, #running-req: 1, #queue-req: 0
  23302. 2025-07-20 17:35:01,653 - __main__ - INFO - sglang running req: 1 queue req: 0
  23303. 2025-07-20 17:35:04,619 - sglang - INFO - [2025-07-20 17:35:04 TP0] Decode batch. #running-req: 5, #token: 9264, token usage: 0.24, gen throughput (token/s): 30.52, #queue-req: 0
  23304. 2025-07-20 17:35:04,619 - __main__ - INFO - sglang running req: 5 queue req: 0
  23305. 2025-07-20 17:35:05,477 - sglang - INFO - [2025-07-20 17:35:05 TP0] Decode batch. #running-req: 4, #token: 8301, token usage: 0.22, gen throughput (token/s): 200.47, #queue-req: 0
  23306. 2025-07-20 17:35:05,477 - __main__ - INFO - sglang running req: 4 queue req: 0
  23307. 2025-07-20 17:35:06,329 - sglang - INFO - [2025-07-20 17:35:06 TP0] Decode batch. #running-req: 4, #token: 8461, token usage: 0.22, gen throughput (token/s): 187.63, #queue-req: 0
  23308. 2025-07-20 17:35:06,329 - __main__ - INFO - sglang running req: 4 queue req: 0
  23309. 2025-07-20 17:35:06,835 - __main__ - INFO - Queue remaining: 10
  23310. 2025-07-20 17:35:06,836 - __main__ - INFO -
  23311. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23312. ----------------------------------------------------------------------------------
  23313. finished_input_tokens 478.76 616.41
  23314. finished_output_tokens 115.58 153.62
  23315. sglang_input_tokens 484.04 614.13
  23316. sglang_output_tokens 120.47 152.28
  23317. 2025-07-20 17:35:06,836 - __main__ - INFO -
  23318. Worker ID | finished | started
  23319. ----------+----------+--------
  23320. 0 | 1 | 5
  23321. 2025-07-20 17:35:07,182 - sglang - INFO - [2025-07-20 17:35:07 TP0] Decode batch. #running-req: 4, #token: 8621, token usage: 0.23, gen throughput (token/s): 187.71, #queue-req: 0
  23322. 2025-07-20 17:35:07,182 - __main__ - INFO - sglang running req: 4 queue req: 0
  23323. 2025-07-20 17:35:08,035 - sglang - INFO - [2025-07-20 17:35:08 TP0] Decode batch. #running-req: 3, #token: 7222, token usage: 0.19, gen throughput (token/s): 170.04, #queue-req: 0
  23324. 2025-07-20 17:35:08,035 - __main__ - INFO - sglang running req: 3 queue req: 0
  23325. 2025-07-20 17:35:08,885 - sglang - INFO - [2025-07-20 17:35:08 TP0] Decode batch. #running-req: 3, #token: 7342, token usage: 0.19, gen throughput (token/s): 141.06, #queue-req: 0
  23326. 2025-07-20 17:35:08,885 - __main__ - INFO - sglang running req: 3 queue req: 0
  23327. 2025-07-20 17:35:09,735 - sglang - INFO - [2025-07-20 17:35:09 TP0] Decode batch. #running-req: 3, #token: 7462, token usage: 0.20, gen throughput (token/s): 141.23, #queue-req: 0
  23328. 2025-07-20 17:35:09,735 - __main__ - INFO - sglang running req: 3 queue req: 0
  23329. 2025-07-20 17:35:10,584 - sglang - INFO - [2025-07-20 17:35:10 TP0] Decode batch. #running-req: 3, #token: 7582, token usage: 0.20, gen throughput (token/s): 141.31, #queue-req: 0
  23330. 2025-07-20 17:35:10,584 - __main__ - INFO - sglang running req: 3 queue req: 0
  23331. 2025-07-20 17:35:11,435 - sglang - INFO - [2025-07-20 17:35:11 TP0] Decode batch. #running-req: 3, #token: 7702, token usage: 0.20, gen throughput (token/s): 140.98, #queue-req: 0
  23332. 2025-07-20 17:35:11,435 - __main__ - INFO - sglang running req: 3 queue req: 0
  23333. 2025-07-20 17:35:12,288 - sglang - INFO - [2025-07-20 17:35:12 TP0] Decode batch. #running-req: 3, #token: 7822, token usage: 0.21, gen throughput (token/s): 140.76, #queue-req: 0
  23334. 2025-07-20 17:35:12,288 - __main__ - INFO - sglang running req: 3 queue req: 0
  23335. 2025-07-20 17:35:13,139 - sglang - INFO - [2025-07-20 17:35:13 TP0] Decode batch. #running-req: 3, #token: 7942, token usage: 0.21, gen throughput (token/s): 140.97, #queue-req: 0
  23336. 2025-07-20 17:35:13,139 - __main__ - INFO - sglang running req: 3 queue req: 0
  23337. 2025-07-20 17:35:13,990 - sglang - INFO - [2025-07-20 17:35:13 TP0] Decode batch. #running-req: 3, #token: 8062, token usage: 0.21, gen throughput (token/s): 141.02, #queue-req: 0
  23338. 2025-07-20 17:35:13,990 - __main__ - INFO - sglang running req: 3 queue req: 0
  23339. 2025-07-20 17:35:14,843 - sglang - INFO - [2025-07-20 17:35:14 TP0] Decode batch. #running-req: 3, #token: 8182, token usage: 0.22, gen throughput (token/s): 140.76, #queue-req: 0
  23340. 2025-07-20 17:35:14,843 - __main__ - INFO - sglang running req: 3 queue req: 0
  23341. 2025-07-20 17:35:15,695 - sglang - INFO - [2025-07-20 17:35:15 TP0] Decode batch. #running-req: 3, #token: 8302, token usage: 0.22, gen throughput (token/s): 140.80, #queue-req: 0
  23342. 2025-07-20 17:35:15,695 - __main__ - INFO - sglang running req: 3 queue req: 0
  23343. 2025-07-20 17:35:16,547 - sglang - INFO - [2025-07-20 17:35:16 TP0] Decode batch. #running-req: 3, #token: 8422, token usage: 0.22, gen throughput (token/s): 140.78, #queue-req: 0
  23344. 2025-07-20 17:35:16,547 - __main__ - INFO - sglang running req: 3 queue req: 0
  23345. 2025-07-20 17:35:16,837 - __main__ - INFO - Queue remaining: 10
  23346. 2025-07-20 17:35:16,837 - __main__ - INFO -
  23347. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23348. ----------------------------------------------------------------------------------
  23349. finished_input_tokens 471.17 616.41
  23350. finished_output_tokens 113.75 153.62
  23351. sglang_input_tokens 478.55 595.10
  23352. sglang_output_tokens 118.82 146.39
  23353. 2025-07-20 17:35:16,838 - __main__ - INFO -
  23354. Worker ID | finished | started
  23355. ----------+----------+--------
  23356. 0 | 2 | 5
  23357. 2025-07-20 17:35:17,396 - sglang - INFO - [2025-07-20 17:35:17 TP0] Decode batch. #running-req: 2, #token: 5339, token usage: 0.14, gen throughput (token/s): 124.85, #queue-req: 0
  23358. 2025-07-20 17:35:17,396 - __main__ - INFO - sglang running req: 2 queue req: 0
  23359. 2025-07-20 17:35:18,228 - sglang - INFO - [2025-07-20 17:35:18 TP0] Decode batch. #running-req: 1, #token: 2858, token usage: 0.08, gen throughput (token/s): 54.10, #queue-req: 0
  23360. 2025-07-20 17:35:18,228 - __main__ - INFO - sglang running req: 1 queue req: 0
  23361. 2025-07-20 17:35:19,058 - sglang - INFO - [2025-07-20 17:35:19 TP0] Decode batch. #running-req: 1, #token: 2898, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
  23362. 2025-07-20 17:35:19,058 - __main__ - INFO - sglang running req: 1 queue req: 0
  23363. 2025-07-20 17:35:19,886 - sglang - INFO - [2025-07-20 17:35:19 TP0] Decode batch. #running-req: 1, #token: 2938, token usage: 0.08, gen throughput (token/s): 48.27, #queue-req: 0
  23364. 2025-07-20 17:35:19,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  23365. 2025-07-20 17:35:20,716 - sglang - INFO - [2025-07-20 17:35:20 TP0] Decode batch. #running-req: 1, #token: 2978, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  23366. 2025-07-20 17:35:20,716 - __main__ - INFO - sglang running req: 1 queue req: 0
  23367. 2025-07-20 17:35:21,114 - __main__ - INFO - Finished TaskGroup for worker on 398aeb9cc239880a7222603994af5c4016796381
  23368. 2025-07-20 17:35:21,114 - __main__ - INFO - Got 1 docs for 398aeb9cc239880a7222603994af5c4016796381
  23369. 2025-07-20 17:35:21,115 - __main__ - INFO - Worker 0 processing work item 06798e8f7cc26525f138f26354ffab7c63074f2c
  23370. 2025-07-20 17:35:21,115 - __main__ - INFO - Created all tasks for 06798e8f7cc26525f138f26354ffab7c63074f2c
  23371. 2025-07-20 17:35:21,121 - __main__ - INFO - Got 14 pages to do for test_pdf/1144520000702630XG344010604301201.pdf in worker 0
  23372. 2025-07-20 17:35:21,301 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-14
  23373. 2025-07-20 17:35:21,305 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  23374. 2025-07-20 17:35:21,316 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-6
  23375. 2025-07-20 17:35:21,338 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-9
  23376. 2025-07-20 17:35:21,341 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-7
  23377. 2025-07-20 17:35:21,347 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-2
  23378. 2025-07-20 17:35:21,350 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-8
  23379. 2025-07-20 17:35:21,357 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-3
  23380. 2025-07-20 17:35:21,360 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-5
  23381. 2025-07-20 17:35:21,364 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-4
  23382. 2025-07-20 17:35:21,374 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-11
  23383. 2025-07-20 17:35:21,380 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-1
  23384. 2025-07-20 17:35:21,433 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-12
  23385. 2025-07-20 17:35:21,451 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-13
  23386. 2025-07-20 17:35:21,476 - sglang - INFO - [2025-07-20 17:35:21 TP0] Prefill batch. #new-seq: 1, #new-token: 2043, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23387. 2025-07-20 17:35:21,476 - __main__ - INFO - sglang running req: 0 queue req: 0
  23388. 2025-07-20 17:35:22,178 - sglang - INFO - [2025-07-20 17:35:22 TP0] Prefill batch. #new-seq: 6, #new-token: 12919, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 7
  23389. 2025-07-20 17:35:22,178 - __main__ - INFO - sglang running req: 1 queue req: 7
  23390. 2025-07-20 17:35:26,676 - sglang - INFO - [2025-07-20 17:35:26 TP0] Decode batch. #running-req: 7, #token: 15109, token usage: 0.40, gen throughput (token/s): 27.85, #queue-req: 7
  23391. 2025-07-20 17:35:26,676 - __main__ - INFO - sglang running req: 7 queue req: 7
  23392. 2025-07-20 17:35:26,838 - __main__ - INFO - Queue remaining: 9
  23393. 2025-07-20 17:35:26,838 - __main__ - INFO -
  23394. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23395. ----------------------------------------------------------------------------------
  23396. finished_input_tokens 478.11 592.65
  23397. finished_output_tokens 115.46 143.38
  23398. sglang_input_tokens 481.50 592.65
  23399. sglang_output_tokens 120.15 143.38
  23400. 2025-07-20 17:35:26,839 - __main__ - INFO -
  23401. Worker ID | started
  23402. ----------+--------
  23403. 0 | 14
  23404. 2025-07-20 17:35:27,557 - sglang - INFO - [2025-07-20 17:35:27 TP0] Decode batch. #running-req: 7, #token: 15389, token usage: 0.41, gen throughput (token/s): 317.87, #queue-req: 7
  23405. 2025-07-20 17:35:27,557 - __main__ - INFO - sglang running req: 7 queue req: 7
  23406. 2025-07-20 17:35:28,438 - sglang - INFO - [2025-07-20 17:35:28 TP0] Decode batch. #running-req: 7, #token: 15669, token usage: 0.41, gen throughput (token/s): 317.80, #queue-req: 7
  23407. 2025-07-20 17:35:28,438 - __main__ - INFO - sglang running req: 7 queue req: 7
  23408. 2025-07-20 17:35:29,320 - sglang - INFO - [2025-07-20 17:35:29 TP0] Decode batch. #running-req: 7, #token: 15949, token usage: 0.42, gen throughput (token/s): 317.45, #queue-req: 7
  23409. 2025-07-20 17:35:29,320 - __main__ - INFO - sglang running req: 7 queue req: 7
  23410. 2025-07-20 17:35:30,213 - sglang - INFO - [2025-07-20 17:35:30 TP0] Decode batch. #running-req: 7, #token: 16229, token usage: 0.43, gen throughput (token/s): 313.54, #queue-req: 7
  23411. 2025-07-20 17:35:30,213 - __main__ - INFO - sglang running req: 7 queue req: 7
  23412. 2025-07-20 17:35:31,106 - sglang - INFO - [2025-07-20 17:35:31 TP0] Decode batch. #running-req: 7, #token: 16509, token usage: 0.43, gen throughput (token/s): 313.70, #queue-req: 7
  23413. 2025-07-20 17:35:31,106 - __main__ - INFO - sglang running req: 7 queue req: 7
  23414. 2025-07-20 17:35:31,996 - sglang - INFO - [2025-07-20 17:35:31 TP0] Decode batch. #running-req: 7, #token: 16789, token usage: 0.44, gen throughput (token/s): 314.42, #queue-req: 7
  23415. 2025-07-20 17:35:31,996 - __main__ - INFO - sglang running req: 7 queue req: 7
  23416. 2025-07-20 17:35:32,063 - sglang - INFO - [2025-07-20 17:35:32 TP0] Prefill batch. #new-seq: 2, #new-token: 4940, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 5
  23417. 2025-07-20 17:35:32,064 - __main__ - INFO - sglang running req: 6 queue req: 5
  23418. 2025-07-20 17:35:34,348 - sglang - INFO - [2025-07-20 17:35:34 TP0] Decode batch. #running-req: 8, #token: 19967, token usage: 0.53, gen throughput (token/s): 134.34, #queue-req: 5
  23419. 2025-07-20 17:35:34,349 - __main__ - INFO - sglang running req: 8 queue req: 5
  23420. 2025-07-20 17:35:35,130 - sglang - INFO - [2025-07-20 17:35:35 TP0] Prefill batch. #new-seq: 2, #new-token: 4920, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.47, #running-req: 7, #queue-req: 3
  23421. 2025-07-20 17:35:35,131 - __main__ - INFO - sglang running req: 7 queue req: 3
  23422. 2025-07-20 17:35:36,712 - sglang - INFO - [2025-07-20 17:35:36 TP0] Decode batch. #running-req: 9, #token: 22833, token usage: 0.60, gen throughput (token/s): 137.09, #queue-req: 3
  23423. 2025-07-20 17:35:36,712 - __main__ - INFO - sglang running req: 9 queue req: 3
  23424. 2025-07-20 17:35:36,841 - __main__ - INFO - Queue remaining: 9
  23425. 2025-07-20 17:35:36,841 - __main__ - INFO -
  23426. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23427. ----------------------------------------------------------------------------------
  23428. finished_input_tokens 470.76 592.65
  23429. finished_output_tokens 113.69 143.38
  23430. sglang_input_tokens 480.03 571.24
  23431. sglang_output_tokens 119.22 138.01
  23432. 2025-07-20 17:35:36,841 - __main__ - INFO -
  23433. Worker ID | finished | started
  23434. ----------+----------+--------
  23435. 0 | 2 | 14
  23436. 2025-07-20 17:35:37,662 - sglang - INFO - [2025-07-20 17:35:37 TP0] Decode batch. #running-req: 9, #token: 23193, token usage: 0.61, gen throughput (token/s): 378.99, #queue-req: 3
  23437. 2025-07-20 17:35:37,662 - __main__ - INFO - sglang running req: 9 queue req: 3
  23438. 2025-07-20 17:35:37,851 - sglang - INFO - [2025-07-20 17:35:37 TP0] Prefill batch. #new-seq: 1, #new-token: 2576, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.54, #running-req: 8, #queue-req: 2
  23439. 2025-07-20 17:35:37,851 - __main__ - INFO - sglang running req: 8 queue req: 2
  23440. 2025-07-20 17:35:39,390 - sglang - INFO - [2025-07-20 17:35:39 TP0] Decode batch. #running-req: 9, #token: 23266, token usage: 0.61, gen throughput (token/s): 207.69, #queue-req: 2
  23441. 2025-07-20 17:35:39,390 - __main__ - INFO - sglang running req: 9 queue req: 2
  23442. 2025-07-20 17:35:40,104 - sglang - INFO - [2025-07-20 17:35:40 TP0] Prefill batch. #new-seq: 2, #new-token: 4634, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.55, #running-req: 8, #queue-req: 0
  23443. 2025-07-20 17:35:40,104 - __main__ - INFO - sglang running req: 8 queue req: 0
  23444. 2025-07-20 17:35:41,764 - sglang - INFO - [2025-07-20 17:35:41 TP0] Decode batch. #running-req: 10, #token: 25674, token usage: 0.68, gen throughput (token/s): 155.45, #queue-req: 0
  23445. 2025-07-20 17:35:41,764 - __main__ - INFO - sglang running req: 10 queue req: 0
  23446. 2025-07-20 17:35:42,724 - sglang - INFO - [2025-07-20 17:35:42 TP0] Decode batch. #running-req: 10, #token: 26074, token usage: 0.69, gen throughput (token/s): 416.59, #queue-req: 0
  23447. 2025-07-20 17:35:42,724 - __main__ - INFO - sglang running req: 10 queue req: 0
  23448. 2025-07-20 17:35:43,686 - sglang - INFO - [2025-07-20 17:35:43 TP0] Decode batch. #running-req: 10, #token: 26474, token usage: 0.70, gen throughput (token/s): 415.76, #queue-req: 0
  23449. 2025-07-20 17:35:43,687 - __main__ - INFO - sglang running req: 10 queue req: 0
  23450. 2025-07-20 17:35:44,646 - sglang - INFO - [2025-07-20 17:35:44 TP0] Decode batch. #running-req: 9, #token: 24245, token usage: 0.64, gen throughput (token/s): 405.16, #queue-req: 0
  23451. 2025-07-20 17:35:44,647 - __main__ - INFO - sglang running req: 9 queue req: 0
  23452. 2025-07-20 17:35:45,555 - sglang - INFO - [2025-07-20 17:35:45 TP0] Decode batch. #running-req: 8, #token: 21644, token usage: 0.57, gen throughput (token/s): 363.08, #queue-req: 0
  23453. 2025-07-20 17:35:45,556 - __main__ - INFO - sglang running req: 8 queue req: 0
  23454. 2025-07-20 17:35:46,451 - sglang - INFO - [2025-07-20 17:35:46 TP0] Decode batch. #running-req: 8, #token: 21964, token usage: 0.58, gen throughput (token/s): 357.28, #queue-req: 0
  23455. 2025-07-20 17:35:46,451 - __main__ - INFO - sglang running req: 8 queue req: 0
  23456. 2025-07-20 17:35:46,842 - __main__ - INFO - Queue remaining: 9
  23457. 2025-07-20 17:35:46,842 - __main__ - INFO -
  23458. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23459. ----------------------------------------------------------------------------------
  23460. finished_input_tokens 463.63 592.65
  23461. finished_output_tokens 111.97 143.38
  23462. sglang_input_tokens 486.40 587.09
  23463. sglang_output_tokens 120.45 140.09
  23464. 2025-07-20 17:35:46,843 - __main__ - INFO -
  23465. Worker ID | finished | started
  23466. ----------+----------+--------
  23467. 0 | 6 | 14
  23468. 2025-07-20 17:35:47,349 - sglang - INFO - [2025-07-20 17:35:47 TP0] Decode batch. #running-req: 8, #token: 22284, token usage: 0.59, gen throughput (token/s): 356.29, #queue-req: 0
  23469. 2025-07-20 17:35:47,349 - __main__ - INFO - sglang running req: 8 queue req: 0
  23470. 2025-07-20 17:35:48,249 - sglang - INFO - [2025-07-20 17:35:48 TP0] Decode batch. #running-req: 8, #token: 22604, token usage: 0.60, gen throughput (token/s): 355.46, #queue-req: 0
  23471. 2025-07-20 17:35:48,250 - __main__ - INFO - sglang running req: 8 queue req: 0
  23472. 2025-07-20 17:35:49,150 - sglang - INFO - [2025-07-20 17:35:49 TP0] Decode batch. #running-req: 8, #token: 22924, token usage: 0.60, gen throughput (token/s): 355.28, #queue-req: 0
  23473. 2025-07-20 17:35:49,150 - __main__ - INFO - sglang running req: 8 queue req: 0
  23474. 2025-07-20 17:35:50,052 - sglang - INFO - [2025-07-20 17:35:50 TP0] Decode batch. #running-req: 7, #token: 20558, token usage: 0.54, gen throughput (token/s): 352.64, #queue-req: 0
  23475. 2025-07-20 17:35:50,052 - __main__ - INFO - sglang running req: 7 queue req: 0
  23476. 2025-07-20 17:35:50,939 - sglang - INFO - [2025-07-20 17:35:50 TP0] Decode batch. #running-req: 6, #token: 17866, token usage: 0.47, gen throughput (token/s): 279.44, #queue-req: 0
  23477. 2025-07-20 17:35:50,940 - __main__ - INFO - sglang running req: 6 queue req: 0
  23478. 2025-07-20 17:35:51,826 - sglang - INFO - [2025-07-20 17:35:51 TP0] Decode batch. #running-req: 6, #token: 18106, token usage: 0.48, gen throughput (token/s): 270.53, #queue-req: 0
  23479. 2025-07-20 17:35:51,827 - __main__ - INFO - sglang running req: 6 queue req: 0
  23480. 2025-07-20 17:35:52,701 - sglang - INFO - [2025-07-20 17:35:52 TP0] Decode batch. #running-req: 3, #token: 8974, token usage: 0.24, gen throughput (token/s): 184.05, #queue-req: 0
  23481. 2025-07-20 17:35:52,701 - __main__ - INFO - sglang running req: 3 queue req: 0
  23482. 2025-07-20 17:35:53,563 - sglang - INFO - [2025-07-20 17:35:53 TP0] Decode batch. #running-req: 3, #token: 9094, token usage: 0.24, gen throughput (token/s): 139.29, #queue-req: 0
  23483. 2025-07-20 17:35:53,563 - __main__ - INFO - sglang running req: 3 queue req: 0
  23484. 2025-07-20 17:35:54,421 - sglang - INFO - [2025-07-20 17:35:54 TP0] Decode batch. #running-req: 3, #token: 9214, token usage: 0.24, gen throughput (token/s): 139.75, #queue-req: 0
  23485. 2025-07-20 17:35:54,422 - __main__ - INFO - sglang running req: 3 queue req: 0
  23486. 2025-07-20 17:35:55,275 - sglang - INFO - [2025-07-20 17:35:55 TP0] Decode batch. #running-req: 3, #token: 9334, token usage: 0.25, gen throughput (token/s): 140.55, #queue-req: 0
  23487. 2025-07-20 17:35:55,275 - __main__ - INFO - sglang running req: 3 queue req: 0
  23488. 2025-07-20 17:35:56,120 - sglang - INFO - [2025-07-20 17:35:56 TP0] Decode batch. #running-req: 2, #token: 6166, token usage: 0.16, gen throughput (token/s): 108.85, #queue-req: 0
  23489. 2025-07-20 17:35:56,120 - __main__ - INFO - sglang running req: 2 queue req: 0
  23490. 2025-07-20 17:35:56,844 - __main__ - INFO - Queue remaining: 9
  23491. 2025-07-20 17:35:56,844 - __main__ - INFO -
  23492. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23493. ----------------------------------------------------------------------------------
  23494. finished_input_tokens 456.72 535.46
  23495. finished_output_tokens 110.30 127.88
  23496. sglang_input_tokens 501.14 627.52
  23497. sglang_output_tokens 123.77 148.00
  23498. 2025-07-20 17:35:56,844 - __main__ - INFO -
  23499. Worker ID | finished | started
  23500. ----------+----------+--------
  23501. 0 | 12 | 14
  23502. 2025-07-20 17:35:56,961 - sglang - INFO - [2025-07-20 17:35:56 TP0] Decode batch. #running-req: 2, #token: 6246, token usage: 0.16, gen throughput (token/s): 95.20, #queue-req: 0
  23503. 2025-07-20 17:35:56,961 - __main__ - INFO - sglang running req: 2 queue req: 0
  23504. 2025-07-20 17:35:57,801 - sglang - INFO - [2025-07-20 17:35:57 TP0] Decode batch. #running-req: 2, #token: 6326, token usage: 0.17, gen throughput (token/s): 95.15, #queue-req: 0
  23505. 2025-07-20 17:35:57,802 - __main__ - INFO - sglang running req: 2 queue req: 0
  23506. 2025-07-20 17:35:58,643 - sglang - INFO - [2025-07-20 17:35:58 TP0] Decode batch. #running-req: 2, #token: 6406, token usage: 0.17, gen throughput (token/s): 95.10, #queue-req: 0
  23507. 2025-07-20 17:35:58,643 - __main__ - INFO - sglang running req: 2 queue req: 0
  23508. 2025-07-20 17:35:59,485 - sglang - INFO - [2025-07-20 17:35:59 TP0] Decode batch. #running-req: 2, #token: 6486, token usage: 0.17, gen throughput (token/s): 94.96, #queue-req: 0
  23509. 2025-07-20 17:35:59,485 - __main__ - INFO - sglang running req: 2 queue req: 0
  23510. 2025-07-20 17:36:00,326 - sglang - INFO - [2025-07-20 17:36:00 TP0] Decode batch. #running-req: 2, #token: 6566, token usage: 0.17, gen throughput (token/s): 95.13, #queue-req: 0
  23511. 2025-07-20 17:36:00,326 - __main__ - INFO - sglang running req: 2 queue req: 0
  23512. 2025-07-20 17:36:01,167 - sglang - INFO - [2025-07-20 17:36:01 TP0] Decode batch. #running-req: 2, #token: 6646, token usage: 0.17, gen throughput (token/s): 95.12, #queue-req: 0
  23513. 2025-07-20 17:36:01,167 - __main__ - INFO - sglang running req: 2 queue req: 0
  23514. 2025-07-20 17:36:01,997 - sglang - INFO - [2025-07-20 17:36:01 TP0] Decode batch. #running-req: 1, #token: 3478, token usage: 0.09, gen throughput (token/s): 49.37, #queue-req: 0
  23515. 2025-07-20 17:36:01,998 - __main__ - INFO - sglang running req: 1 queue req: 0
  23516. 2025-07-20 17:36:02,828 - sglang - INFO - [2025-07-20 17:36:02 TP0] Decode batch. #running-req: 1, #token: 3518, token usage: 0.09, gen throughput (token/s): 48.14, #queue-req: 0
  23517. 2025-07-20 17:36:02,829 - __main__ - INFO - sglang running req: 1 queue req: 0
  23518. 2025-07-20 17:36:03,660 - sglang - INFO - [2025-07-20 17:36:03 TP0] Decode batch. #running-req: 1, #token: 3558, token usage: 0.09, gen throughput (token/s): 48.10, #queue-req: 0
  23519. 2025-07-20 17:36:03,660 - __main__ - INFO - sglang running req: 1 queue req: 0
  23520. 2025-07-20 17:36:04,491 - sglang - INFO - [2025-07-20 17:36:04 TP0] Decode batch. #running-req: 1, #token: 3598, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
  23521. 2025-07-20 17:36:04,491 - __main__ - INFO - sglang running req: 1 queue req: 0
  23522. 2025-07-20 17:36:05,321 - sglang - INFO - [2025-07-20 17:36:05 TP0] Decode batch. #running-req: 1, #token: 3638, token usage: 0.10, gen throughput (token/s): 48.17, #queue-req: 0
  23523. 2025-07-20 17:36:05,321 - __main__ - INFO - sglang running req: 1 queue req: 0
  23524. 2025-07-20 17:36:06,153 - sglang - INFO - [2025-07-20 17:36:06 TP0] Decode batch. #running-req: 1, #token: 3678, token usage: 0.10, gen throughput (token/s): 48.10, #queue-req: 0
  23525. 2025-07-20 17:36:06,153 - __main__ - INFO - sglang running req: 1 queue req: 0
  23526. 2025-07-20 17:36:06,845 - __main__ - INFO - Queue remaining: 9
  23527. 2025-07-20 17:36:06,845 - __main__ - INFO -
  23528. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23529. ----------------------------------------------------------------------------------
  23530. finished_input_tokens 450.00 535.46
  23531. finished_output_tokens 108.68 127.88
  23532. sglang_input_tokens 497.18 628.15
  23533. sglang_output_tokens 123.26 150.15
  23534. 2025-07-20 17:36:06,845 - __main__ - INFO -
  23535. Worker ID | finished | started
  23536. ----------+----------+--------
  23537. 0 | 13 | 14
  23538. 2025-07-20 17:36:06,985 - sglang - INFO - [2025-07-20 17:36:06 TP0] Decode batch. #running-req: 1, #token: 3718, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
  23539. 2025-07-20 17:36:06,985 - __main__ - INFO - sglang running req: 1 queue req: 0
  23540. 2025-07-20 17:36:07,816 - sglang - INFO - [2025-07-20 17:36:07 TP0] Decode batch. #running-req: 1, #token: 3758, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
  23541. 2025-07-20 17:36:07,816 - __main__ - INFO - sglang running req: 1 queue req: 0
  23542. 2025-07-20 17:36:08,647 - sglang - INFO - [2025-07-20 17:36:08 TP0] Decode batch. #running-req: 1, #token: 3798, token usage: 0.10, gen throughput (token/s): 48.13, #queue-req: 0
  23543. 2025-07-20 17:36:08,647 - __main__ - INFO - sglang running req: 1 queue req: 0
  23544. 2025-07-20 17:36:09,479 - sglang - INFO - [2025-07-20 17:36:09 TP0] Decode batch. #running-req: 1, #token: 3838, token usage: 0.10, gen throughput (token/s): 48.07, #queue-req: 0
  23545. 2025-07-20 17:36:09,479 - __main__ - INFO - sglang running req: 1 queue req: 0
  23546. 2025-07-20 17:36:10,311 - sglang - INFO - [2025-07-20 17:36:10 TP0] Decode batch. #running-req: 1, #token: 3878, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
  23547. 2025-07-20 17:36:10,311 - __main__ - INFO - sglang running req: 1 queue req: 0
  23548. 2025-07-20 17:36:11,142 - sglang - INFO - [2025-07-20 17:36:11 TP0] Decode batch. #running-req: 1, #token: 3918, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
  23549. 2025-07-20 17:36:11,142 - __main__ - INFO - sglang running req: 1 queue req: 0
  23550. 2025-07-20 17:36:11,973 - sglang - INFO - [2025-07-20 17:36:11 TP0] Decode batch. #running-req: 1, #token: 3958, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
  23551. 2025-07-20 17:36:11,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  23552. 2025-07-20 17:36:12,805 - sglang - INFO - [2025-07-20 17:36:12 TP0] Decode batch. #running-req: 1, #token: 3998, token usage: 0.11, gen throughput (token/s): 48.08, #queue-req: 0
  23553. 2025-07-20 17:36:12,805 - __main__ - INFO - sglang running req: 1 queue req: 0
  23554. 2025-07-20 17:36:13,637 - sglang - INFO - [2025-07-20 17:36:13 TP0] Decode batch. #running-req: 1, #token: 4038, token usage: 0.11, gen throughput (token/s): 48.08, #queue-req: 0
  23555. 2025-07-20 17:36:13,638 - __main__ - INFO - sglang running req: 1 queue req: 0
  23556. 2025-07-20 17:36:14,470 - sglang - INFO - [2025-07-20 17:36:14 TP0] Decode batch. #running-req: 1, #token: 4078, token usage: 0.11, gen throughput (token/s): 48.06, #queue-req: 0
  23557. 2025-07-20 17:36:14,470 - __main__ - INFO - sglang running req: 1 queue req: 0
  23558. 2025-07-20 17:36:15,302 - sglang - INFO - [2025-07-20 17:36:15 TP0] Decode batch. #running-req: 1, #token: 4118, token usage: 0.11, gen throughput (token/s): 48.05, #queue-req: 0
  23559. 2025-07-20 17:36:15,302 - __main__ - INFO - sglang running req: 1 queue req: 0
  23560. 2025-07-20 17:36:16,135 - sglang - INFO - [2025-07-20 17:36:16 TP0] Decode batch. #running-req: 1, #token: 4158, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
  23561. 2025-07-20 17:36:16,135 - __main__ - INFO - sglang running req: 1 queue req: 0
  23562. 2025-07-20 17:36:16,847 - __main__ - INFO - Queue remaining: 9
  23563. 2025-07-20 17:36:16,847 - __main__ - INFO -
  23564. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23565. ----------------------------------------------------------------------------------
  23566. finished_input_tokens 443.49 535.46
  23567. finished_output_tokens 107.10 127.88
  23568. sglang_input_tokens 489.98 628.15
  23569. sglang_output_tokens 121.48 150.15
  23570. 2025-07-20 17:36:16,847 - __main__ - INFO -
  23571. Worker ID | finished | started
  23572. ----------+----------+--------
  23573. 0 | 13 | 14
  23574. 2025-07-20 17:36:16,967 - sglang - INFO - [2025-07-20 17:36:16 TP0] Decode batch. #running-req: 1, #token: 4198, token usage: 0.11, gen throughput (token/s): 48.03, #queue-req: 0
  23575. 2025-07-20 17:36:16,968 - __main__ - INFO - sglang running req: 1 queue req: 0
  23576. 2025-07-20 17:36:17,801 - sglang - INFO - [2025-07-20 17:36:17 TP0] Decode batch. #running-req: 1, #token: 4238, token usage: 0.11, gen throughput (token/s): 48.01, #queue-req: 0
  23577. 2025-07-20 17:36:17,801 - __main__ - INFO - sglang running req: 1 queue req: 0
  23578. 2025-07-20 17:36:18,634 - sglang - INFO - [2025-07-20 17:36:18 TP0] Decode batch. #running-req: 1, #token: 4278, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
  23579. 2025-07-20 17:36:18,634 - __main__ - INFO - sglang running req: 1 queue req: 0
  23580. 2025-07-20 17:36:19,467 - sglang - INFO - [2025-07-20 17:36:19 TP0] Decode batch. #running-req: 1, #token: 4318, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
  23581. 2025-07-20 17:36:19,467 - __main__ - INFO - sglang running req: 1 queue req: 0
  23582. 2025-07-20 17:36:20,300 - sglang - INFO - [2025-07-20 17:36:20 TP0] Decode batch. #running-req: 1, #token: 4358, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
  23583. 2025-07-20 17:36:20,300 - __main__ - INFO - sglang running req: 1 queue req: 0
  23584. 2025-07-20 17:36:21,133 - sglang - INFO - [2025-07-20 17:36:21 TP0] Decode batch. #running-req: 1, #token: 4398, token usage: 0.12, gen throughput (token/s): 48.00, #queue-req: 0
  23585. 2025-07-20 17:36:21,133 - __main__ - INFO - sglang running req: 1 queue req: 0
  23586. 2025-07-20 17:36:21,966 - sglang - INFO - [2025-07-20 17:36:21 TP0] Decode batch. #running-req: 1, #token: 4438, token usage: 0.12, gen throughput (token/s): 47.99, #queue-req: 0
  23587. 2025-07-20 17:36:21,967 - __main__ - INFO - sglang running req: 1 queue req: 0
  23588. 2025-07-20 17:36:22,800 - sglang - INFO - [2025-07-20 17:36:22 TP0] Decode batch. #running-req: 1, #token: 4478, token usage: 0.12, gen throughput (token/s): 47.97, #queue-req: 0
  23589. 2025-07-20 17:36:22,801 - __main__ - INFO - sglang running req: 1 queue req: 0
  23590. 2025-07-20 17:36:23,634 - sglang - INFO - [2025-07-20 17:36:23 TP0] Decode batch. #running-req: 1, #token: 4518, token usage: 0.12, gen throughput (token/s): 47.98, #queue-req: 0
  23591. 2025-07-20 17:36:23,634 - __main__ - INFO - sglang running req: 1 queue req: 0
  23592. 2025-07-20 17:36:24,468 - sglang - INFO - [2025-07-20 17:36:24 TP0] Decode batch. #running-req: 1, #token: 4558, token usage: 0.12, gen throughput (token/s): 47.99, #queue-req: 0
  23593. 2025-07-20 17:36:24,468 - __main__ - INFO - sglang running req: 1 queue req: 0
  23594. 2025-07-20 17:36:25,303 - sglang - INFO - [2025-07-20 17:36:25 TP0] Decode batch. #running-req: 1, #token: 4598, token usage: 0.12, gen throughput (token/s): 47.90, #queue-req: 0
  23595. 2025-07-20 17:36:25,303 - __main__ - INFO - sglang running req: 1 queue req: 0
  23596. 2025-07-20 17:36:26,138 - sglang - INFO - [2025-07-20 17:36:26 TP0] Decode batch. #running-req: 1, #token: 4638, token usage: 0.12, gen throughput (token/s): 47.88, #queue-req: 0
  23597. 2025-07-20 17:36:26,138 - __main__ - INFO - sglang running req: 1 queue req: 0
  23598. 2025-07-20 17:36:26,849 - __main__ - INFO - Queue remaining: 9
  23599. 2025-07-20 17:36:26,850 - __main__ - INFO -
  23600. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23601. ----------------------------------------------------------------------------------
  23602. finished_input_tokens 437.15 535.46
  23603. finished_output_tokens 105.57 127.88
  23604. sglang_input_tokens 482.99 628.15
  23605. sglang_output_tokens 119.74 150.15
  23606. 2025-07-20 17:36:26,850 - __main__ - INFO -
  23607. Worker ID | finished | started
  23608. ----------+----------+--------
  23609. 0 | 13 | 14
  23610. 2025-07-20 17:36:26,973 - sglang - INFO - [2025-07-20 17:36:26 TP0] Decode batch. #running-req: 1, #token: 4678, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
  23611. 2025-07-20 17:36:26,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  23612. 2025-07-20 17:36:27,808 - sglang - INFO - [2025-07-20 17:36:27 TP0] Decode batch. #running-req: 1, #token: 4718, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
  23613. 2025-07-20 17:36:27,808 - __main__ - INFO - sglang running req: 1 queue req: 0
  23614. 2025-07-20 17:36:28,642 - sglang - INFO - [2025-07-20 17:36:28 TP0] Decode batch. #running-req: 1, #token: 4758, token usage: 0.13, gen throughput (token/s): 47.93, #queue-req: 0
  23615. 2025-07-20 17:36:28,643 - __main__ - INFO - sglang running req: 1 queue req: 0
  23616. 2025-07-20 17:36:29,478 - sglang - INFO - [2025-07-20 17:36:29 TP0] Decode batch. #running-req: 1, #token: 4798, token usage: 0.13, gen throughput (token/s): 47.89, #queue-req: 0
  23617. 2025-07-20 17:36:29,478 - __main__ - INFO - sglang running req: 1 queue req: 0
  23618. 2025-07-20 17:36:30,313 - sglang - INFO - [2025-07-20 17:36:30 TP0] Decode batch. #running-req: 1, #token: 4838, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
  23619. 2025-07-20 17:36:30,313 - __main__ - INFO - sglang running req: 1 queue req: 0
  23620. 2025-07-20 17:36:31,147 - sglang - INFO - [2025-07-20 17:36:31 TP0] Decode batch. #running-req: 1, #token: 4878, token usage: 0.13, gen throughput (token/s): 47.92, #queue-req: 0
  23621. 2025-07-20 17:36:31,148 - __main__ - INFO - sglang running req: 1 queue req: 0
  23622. 2025-07-20 17:36:31,982 - sglang - INFO - [2025-07-20 17:36:31 TP0] Decode batch. #running-req: 1, #token: 4918, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
  23623. 2025-07-20 17:36:31,982 - __main__ - INFO - sglang running req: 1 queue req: 0
  23624. 2025-07-20 17:36:32,818 - sglang - INFO - [2025-07-20 17:36:32 TP0] Decode batch. #running-req: 1, #token: 4958, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
  23625. 2025-07-20 17:36:32,818 - __main__ - INFO - sglang running req: 1 queue req: 0
  23626. 2025-07-20 17:36:33,653 - sglang - INFO - [2025-07-20 17:36:33 TP0] Decode batch. #running-req: 1, #token: 4998, token usage: 0.13, gen throughput (token/s): 47.88, #queue-req: 0
  23627. 2025-07-20 17:36:33,653 - __main__ - INFO - sglang running req: 1 queue req: 0
  23628. 2025-07-20 17:36:34,488 - sglang - INFO - [2025-07-20 17:36:34 TP0] Decode batch. #running-req: 1, #token: 5038, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
  23629. 2025-07-20 17:36:34,489 - __main__ - INFO - sglang running req: 1 queue req: 0
  23630. 2025-07-20 17:36:35,324 - sglang - INFO - [2025-07-20 17:36:35 TP0] Decode batch. #running-req: 1, #token: 5078, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
  23631. 2025-07-20 17:36:35,324 - __main__ - INFO - sglang running req: 1 queue req: 0
  23632. 2025-07-20 17:36:35,707 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604301201.pdf-10: Unterminated string starting at: line 1 column 125 (char 124)
  23633. 2025-07-20 17:36:35,843 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  23634. 2025-07-20 17:36:36,014 - sglang - INFO - [2025-07-20 17:36:36 TP0] Prefill batch. #new-seq: 1, #new-token: 2097, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23635. 2025-07-20 17:36:36,015 - __main__ - INFO - sglang running req: 0 queue req: 0
  23636. 2025-07-20 17:36:36,852 - __main__ - INFO - Queue remaining: 9
  23637. 2025-07-20 17:36:36,852 - __main__ - INFO -
  23638. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23639. ----------------------------------------------------------------------------------
  23640. finished_input_tokens 431.00 469.36
  23641. finished_output_tokens 104.09 112.60
  23642. sglang_input_tokens 479.14 564.80
  23643. sglang_output_tokens 122.28 143.21
  23644. 2025-07-20 17:36:36,852 - __main__ - INFO -
  23645. Worker ID | finished | started
  23646. ----------+----------+--------
  23647. 0 | 13 | 14
  23648. 2025-07-20 17:36:37,146 - sglang - INFO - [2025-07-20 17:36:37 TP0] Decode batch. #running-req: 1, #token: 2119, token usage: 0.06, gen throughput (token/s): 21.96, #queue-req: 0
  23649. 2025-07-20 17:36:37,146 - __main__ - INFO - sglang running req: 1 queue req: 0
  23650. 2025-07-20 17:36:37,972 - sglang - INFO - [2025-07-20 17:36:37 TP0] Decode batch. #running-req: 1, #token: 2159, token usage: 0.06, gen throughput (token/s): 48.37, #queue-req: 0
  23651. 2025-07-20 17:36:37,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  23652. 2025-07-20 17:36:38,799 - sglang - INFO - [2025-07-20 17:36:38 TP0] Decode batch. #running-req: 1, #token: 2199, token usage: 0.06, gen throughput (token/s): 48.43, #queue-req: 0
  23653. 2025-07-20 17:36:38,799 - __main__ - INFO - sglang running req: 1 queue req: 0
  23654. 2025-07-20 17:36:39,626 - sglang - INFO - [2025-07-20 17:36:39 TP0] Decode batch. #running-req: 1, #token: 2239, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
  23655. 2025-07-20 17:36:39,627 - __main__ - INFO - sglang running req: 1 queue req: 0
  23656. 2025-07-20 17:36:40,454 - sglang - INFO - [2025-07-20 17:36:40 TP0] Decode batch. #running-req: 1, #token: 2279, token usage: 0.06, gen throughput (token/s): 48.33, #queue-req: 0
  23657. 2025-07-20 17:36:40,454 - __main__ - INFO - sglang running req: 1 queue req: 0
  23658. 2025-07-20 17:36:41,281 - sglang - INFO - [2025-07-20 17:36:41 TP0] Decode batch. #running-req: 1, #token: 2319, token usage: 0.06, gen throughput (token/s): 48.41, #queue-req: 0
  23659. 2025-07-20 17:36:41,281 - __main__ - INFO - sglang running req: 1 queue req: 0
  23660. 2025-07-20 17:36:42,107 - sglang - INFO - [2025-07-20 17:36:42 TP0] Decode batch. #running-req: 1, #token: 2359, token usage: 0.06, gen throughput (token/s): 48.38, #queue-req: 0
  23661. 2025-07-20 17:36:42,108 - __main__ - INFO - sglang running req: 1 queue req: 0
  23662. 2025-07-20 17:36:42,935 - sglang - INFO - [2025-07-20 17:36:42 TP0] Decode batch. #running-req: 1, #token: 2399, token usage: 0.06, gen throughput (token/s): 48.34, #queue-req: 0
  23663. 2025-07-20 17:36:42,935 - __main__ - INFO - sglang running req: 1 queue req: 0
  23664. 2025-07-20 17:36:43,762 - sglang - INFO - [2025-07-20 17:36:43 TP0] Decode batch. #running-req: 1, #token: 2439, token usage: 0.06, gen throughput (token/s): 48.37, #queue-req: 0
  23665. 2025-07-20 17:36:43,762 - __main__ - INFO - sglang running req: 1 queue req: 0
  23666. 2025-07-20 17:36:44,589 - sglang - INFO - [2025-07-20 17:36:44 TP0] Decode batch. #running-req: 1, #token: 2479, token usage: 0.07, gen throughput (token/s): 48.38, #queue-req: 0
  23667. 2025-07-20 17:36:44,589 - __main__ - INFO - sglang running req: 1 queue req: 0
  23668. 2025-07-20 17:36:45,416 - sglang - INFO - [2025-07-20 17:36:45 TP0] Decode batch. #running-req: 1, #token: 2519, token usage: 0.07, gen throughput (token/s): 48.32, #queue-req: 0
  23669. 2025-07-20 17:36:45,417 - __main__ - INFO - sglang running req: 1 queue req: 0
  23670. 2025-07-20 17:36:46,244 - sglang - INFO - [2025-07-20 17:36:46 TP0] Decode batch. #running-req: 1, #token: 2559, token usage: 0.07, gen throughput (token/s): 48.34, #queue-req: 0
  23671. 2025-07-20 17:36:46,244 - __main__ - INFO - sglang running req: 1 queue req: 0
  23672. 2025-07-20 17:36:46,854 - __main__ - INFO - Queue remaining: 9
  23673. 2025-07-20 17:36:46,855 - __main__ - INFO -
  23674. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23675. ----------------------------------------------------------------------------------
  23676. finished_input_tokens 425.02 469.36
  23677. finished_output_tokens 102.64 112.60
  23678. sglang_input_tokens 472.49 564.80
  23679. sglang_output_tokens 120.58 143.21
  23680. 2025-07-20 17:36:46,855 - __main__ - INFO -
  23681. Worker ID | finished | started
  23682. ----------+----------+--------
  23683. 0 | 13 | 14
  23684. 2025-07-20 17:36:47,071 - sglang - INFO - [2025-07-20 17:36:47 TP0] Decode batch. #running-req: 1, #token: 2599, token usage: 0.07, gen throughput (token/s): 48.36, #queue-req: 0
  23685. 2025-07-20 17:36:47,071 - __main__ - INFO - sglang running req: 1 queue req: 0
  23686. 2025-07-20 17:36:47,899 - sglang - INFO - [2025-07-20 17:36:47 TP0] Decode batch. #running-req: 1, #token: 2639, token usage: 0.07, gen throughput (token/s): 48.31, #queue-req: 0
  23687. 2025-07-20 17:36:47,899 - __main__ - INFO - sglang running req: 1 queue req: 0
  23688. 2025-07-20 17:36:48,727 - sglang - INFO - [2025-07-20 17:36:48 TP0] Decode batch. #running-req: 1, #token: 2679, token usage: 0.07, gen throughput (token/s): 48.31, #queue-req: 0
  23689. 2025-07-20 17:36:48,727 - __main__ - INFO - sglang running req: 1 queue req: 0
  23690. 2025-07-20 17:36:49,554 - sglang - INFO - [2025-07-20 17:36:49 TP0] Decode batch. #running-req: 1, #token: 2719, token usage: 0.07, gen throughput (token/s): 48.35, #queue-req: 0
  23691. 2025-07-20 17:36:49,555 - __main__ - INFO - sglang running req: 1 queue req: 0
  23692. 2025-07-20 17:36:50,383 - sglang - INFO - [2025-07-20 17:36:50 TP0] Decode batch. #running-req: 1, #token: 2759, token usage: 0.07, gen throughput (token/s): 48.30, #queue-req: 0
  23693. 2025-07-20 17:36:50,383 - __main__ - INFO - sglang running req: 1 queue req: 0
  23694. 2025-07-20 17:36:51,211 - sglang - INFO - [2025-07-20 17:36:51 TP0] Decode batch. #running-req: 1, #token: 2799, token usage: 0.07, gen throughput (token/s): 48.28, #queue-req: 0
  23695. 2025-07-20 17:36:51,211 - __main__ - INFO - sglang running req: 1 queue req: 0
  23696. 2025-07-20 17:36:52,038 - sglang - INFO - [2025-07-20 17:36:52 TP0] Decode batch. #running-req: 1, #token: 2839, token usage: 0.07, gen throughput (token/s): 48.34, #queue-req: 0
  23697. 2025-07-20 17:36:52,039 - __main__ - INFO - sglang running req: 1 queue req: 0
  23698. 2025-07-20 17:36:52,867 - sglang - INFO - [2025-07-20 17:36:52 TP0] Decode batch. #running-req: 1, #token: 2879, token usage: 0.08, gen throughput (token/s): 48.31, #queue-req: 0
  23699. 2025-07-20 17:36:52,867 - __main__ - INFO - sglang running req: 1 queue req: 0
  23700. 2025-07-20 17:36:53,695 - sglang - INFO - [2025-07-20 17:36:53 TP0] Decode batch. #running-req: 1, #token: 2919, token usage: 0.08, gen throughput (token/s): 48.26, #queue-req: 0
  23701. 2025-07-20 17:36:53,696 - __main__ - INFO - sglang running req: 1 queue req: 0
  23702. 2025-07-20 17:36:54,524 - sglang - INFO - [2025-07-20 17:36:54 TP0] Decode batch. #running-req: 1, #token: 2959, token usage: 0.08, gen throughput (token/s): 48.28, #queue-req: 0
  23703. 2025-07-20 17:36:54,524 - __main__ - INFO - sglang running req: 1 queue req: 0
  23704. 2025-07-20 17:36:55,352 - sglang - INFO - [2025-07-20 17:36:55 TP0] Decode batch. #running-req: 1, #token: 2999, token usage: 0.08, gen throughput (token/s): 48.30, #queue-req: 0
  23705. 2025-07-20 17:36:55,352 - __main__ - INFO - sglang running req: 1 queue req: 0
  23706. 2025-07-20 17:36:56,182 - sglang - INFO - [2025-07-20 17:36:56 TP0] Decode batch. #running-req: 1, #token: 3039, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
  23707. 2025-07-20 17:36:56,182 - __main__ - INFO - sglang running req: 1 queue req: 0
  23708. 2025-07-20 17:36:56,856 - __main__ - INFO - Queue remaining: 9
  23709. 2025-07-20 17:36:56,857 - __main__ - INFO -
  23710. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23711. ----------------------------------------------------------------------------------
  23712. finished_input_tokens 419.20 469.36
  23713. finished_output_tokens 101.24 112.60
  23714. sglang_input_tokens 466.02 564.80
  23715. sglang_output_tokens 118.93 143.21
  23716. 2025-07-20 17:36:56,857 - __main__ - INFO -
  23717. Worker ID | finished | started
  23718. ----------+----------+--------
  23719. 0 | 13 | 14
  23720. 2025-07-20 17:36:57,012 - sglang - INFO - [2025-07-20 17:36:57 TP0] Decode batch. #running-req: 1, #token: 3079, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  23721. 2025-07-20 17:36:57,012 - __main__ - INFO - sglang running req: 1 queue req: 0
  23722. 2025-07-20 17:36:57,840 - sglang - INFO - [2025-07-20 17:36:57 TP0] Decode batch. #running-req: 1, #token: 3119, token usage: 0.08, gen throughput (token/s): 48.27, #queue-req: 0
  23723. 2025-07-20 17:36:57,841 - __main__ - INFO - sglang running req: 1 queue req: 0
  23724. 2025-07-20 17:36:58,670 - sglang - INFO - [2025-07-20 17:36:58 TP0] Decode batch. #running-req: 1, #token: 3159, token usage: 0.08, gen throughput (token/s): 48.24, #queue-req: 0
  23725. 2025-07-20 17:36:58,670 - __main__ - INFO - sglang running req: 1 queue req: 0
  23726. 2025-07-20 17:36:59,500 - sglang - INFO - [2025-07-20 17:36:59 TP0] Decode batch. #running-req: 1, #token: 3199, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  23727. 2025-07-20 17:36:59,500 - __main__ - INFO - sglang running req: 1 queue req: 0
  23728. 2025-07-20 17:37:00,330 - sglang - INFO - [2025-07-20 17:37:00 TP0] Decode batch. #running-req: 1, #token: 3239, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
  23729. 2025-07-20 17:37:00,330 - __main__ - INFO - sglang running req: 1 queue req: 0
  23730. 2025-07-20 17:37:01,159 - sglang - INFO - [2025-07-20 17:37:01 TP0] Decode batch. #running-req: 1, #token: 3279, token usage: 0.09, gen throughput (token/s): 48.24, #queue-req: 0
  23731. 2025-07-20 17:37:01,159 - __main__ - INFO - sglang running req: 1 queue req: 0
  23732. 2025-07-20 17:37:01,988 - sglang - INFO - [2025-07-20 17:37:01 TP0] Decode batch. #running-req: 1, #token: 3319, token usage: 0.09, gen throughput (token/s): 48.25, #queue-req: 0
  23733. 2025-07-20 17:37:01,988 - __main__ - INFO - sglang running req: 1 queue req: 0
  23734. 2025-07-20 17:37:02,819 - sglang - INFO - [2025-07-20 17:37:02 TP0] Decode batch. #running-req: 1, #token: 3359, token usage: 0.09, gen throughput (token/s): 48.15, #queue-req: 0
  23735. 2025-07-20 17:37:02,819 - __main__ - INFO - sglang running req: 1 queue req: 0
  23736. 2025-07-20 17:37:03,649 - sglang - INFO - [2025-07-20 17:37:03 TP0] Decode batch. #running-req: 1, #token: 3399, token usage: 0.09, gen throughput (token/s): 48.16, #queue-req: 0
  23737. 2025-07-20 17:37:03,649 - __main__ - INFO - sglang running req: 1 queue req: 0
  23738. 2025-07-20 17:37:04,479 - sglang - INFO - [2025-07-20 17:37:04 TP0] Decode batch. #running-req: 1, #token: 3439, token usage: 0.09, gen throughput (token/s): 48.22, #queue-req: 0
  23739. 2025-07-20 17:37:04,479 - __main__ - INFO - sglang running req: 1 queue req: 0
  23740. 2025-07-20 17:37:05,309 - sglang - INFO - [2025-07-20 17:37:05 TP0] Decode batch. #running-req: 1, #token: 3479, token usage: 0.09, gen throughput (token/s): 48.21, #queue-req: 0
  23741. 2025-07-20 17:37:05,309 - __main__ - INFO - sglang running req: 1 queue req: 0
  23742. 2025-07-20 17:37:06,140 - sglang - INFO - [2025-07-20 17:37:06 TP0] Decode batch. #running-req: 1, #token: 3519, token usage: 0.09, gen throughput (token/s): 48.11, #queue-req: 0
  23743. 2025-07-20 17:37:06,140 - __main__ - INFO - sglang running req: 1 queue req: 0
  23744. 2025-07-20 17:37:06,858 - __main__ - INFO - Queue remaining: 9
  23745. 2025-07-20 17:37:06,859 - __main__ - INFO -
  23746. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23747. ----------------------------------------------------------------------------------
  23748. finished_input_tokens 413.54 469.36
  23749. finished_output_tokens 99.87 112.60
  23750. sglang_input_tokens 459.72 564.80
  23751. sglang_output_tokens 117.32 143.21
  23752. 2025-07-20 17:37:06,859 - __main__ - INFO -
  23753. Worker ID | finished | started
  23754. ----------+----------+--------
  23755. 0 | 13 | 14
  23756. 2025-07-20 17:37:06,971 - sglang - INFO - [2025-07-20 17:37:06 TP0] Decode batch. #running-req: 1, #token: 3559, token usage: 0.09, gen throughput (token/s): 48.13, #queue-req: 0
  23757. 2025-07-20 17:37:06,971 - __main__ - INFO - sglang running req: 1 queue req: 0
  23758. 2025-07-20 17:37:07,802 - sglang - INFO - [2025-07-20 17:37:07 TP0] Decode batch. #running-req: 1, #token: 3599, token usage: 0.09, gen throughput (token/s): 48.17, #queue-req: 0
  23759. 2025-07-20 17:37:07,802 - __main__ - INFO - sglang running req: 1 queue req: 0
  23760. 2025-07-20 17:37:08,632 - sglang - INFO - [2025-07-20 17:37:08 TP0] Decode batch. #running-req: 1, #token: 3639, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
  23761. 2025-07-20 17:37:08,633 - __main__ - INFO - sglang running req: 1 queue req: 0
  23762. 2025-07-20 17:37:09,464 - sglang - INFO - [2025-07-20 17:37:09 TP0] Decode batch. #running-req: 1, #token: 3679, token usage: 0.10, gen throughput (token/s): 48.07, #queue-req: 0
  23763. 2025-07-20 17:37:09,465 - __main__ - INFO - sglang running req: 1 queue req: 0
  23764. 2025-07-20 17:37:10,296 - sglang - INFO - [2025-07-20 17:37:10 TP0] Decode batch. #running-req: 1, #token: 3719, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
  23765. 2025-07-20 17:37:10,296 - __main__ - INFO - sglang running req: 1 queue req: 0
  23766. 2025-07-20 17:37:11,127 - sglang - INFO - [2025-07-20 17:37:11 TP0] Decode batch. #running-req: 1, #token: 3759, token usage: 0.10, gen throughput (token/s): 48.16, #queue-req: 0
  23767. 2025-07-20 17:37:11,127 - __main__ - INFO - sglang running req: 1 queue req: 0
  23768. 2025-07-20 17:37:11,958 - sglang - INFO - [2025-07-20 17:37:11 TP0] Decode batch. #running-req: 1, #token: 3799, token usage: 0.10, gen throughput (token/s): 48.10, #queue-req: 0
  23769. 2025-07-20 17:37:11,959 - __main__ - INFO - sglang running req: 1 queue req: 0
  23770. 2025-07-20 17:37:12,791 - sglang - INFO - [2025-07-20 17:37:12 TP0] Decode batch. #running-req: 1, #token: 3839, token usage: 0.10, gen throughput (token/s): 48.06, #queue-req: 0
  23771. 2025-07-20 17:37:12,791 - __main__ - INFO - sglang running req: 1 queue req: 0
  23772. 2025-07-20 17:37:13,623 - sglang - INFO - [2025-07-20 17:37:13 TP0] Decode batch. #running-req: 1, #token: 3879, token usage: 0.10, gen throughput (token/s): 48.09, #queue-req: 0
  23773. 2025-07-20 17:37:13,623 - __main__ - INFO - sglang running req: 1 queue req: 0
  23774. 2025-07-20 17:37:14,454 - sglang - INFO - [2025-07-20 17:37:14 TP0] Decode batch. #running-req: 1, #token: 3919, token usage: 0.10, gen throughput (token/s): 48.11, #queue-req: 0
  23775. 2025-07-20 17:37:14,454 - __main__ - INFO - sglang running req: 1 queue req: 0
  23776. 2025-07-20 17:37:15,287 - sglang - INFO - [2025-07-20 17:37:15 TP0] Decode batch. #running-req: 1, #token: 3959, token usage: 0.10, gen throughput (token/s): 48.05, #queue-req: 0
  23777. 2025-07-20 17:37:15,287 - __main__ - INFO - sglang running req: 1 queue req: 0
  23778. 2025-07-20 17:37:16,119 - sglang - INFO - [2025-07-20 17:37:16 TP0] Decode batch. #running-req: 1, #token: 3999, token usage: 0.11, gen throughput (token/s): 48.05, #queue-req: 0
  23779. 2025-07-20 17:37:16,119 - __main__ - INFO - sglang running req: 1 queue req: 0
  23780. 2025-07-20 17:37:16,860 - __main__ - INFO - Queue remaining: 9
  23781. 2025-07-20 17:37:16,861 - __main__ - INFO -
  23782. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23783. ----------------------------------------------------------------------------------
  23784. finished_input_tokens 408.03 469.36
  23785. finished_output_tokens 98.54 112.60
  23786. sglang_input_tokens 453.60 564.80
  23787. sglang_output_tokens 115.76 143.21
  23788. 2025-07-20 17:37:16,861 - __main__ - INFO -
  23789. Worker ID | finished | started
  23790. ----------+----------+--------
  23791. 0 | 13 | 14
  23792. 2025-07-20 17:37:16,950 - sglang - INFO - [2025-07-20 17:37:16 TP0] Decode batch. #running-req: 1, #token: 4039, token usage: 0.11, gen throughput (token/s): 48.12, #queue-req: 0
  23793. 2025-07-20 17:37:16,951 - __main__ - INFO - sglang running req: 1 queue req: 0
  23794. 2025-07-20 17:37:17,782 - sglang - INFO - [2025-07-20 17:37:17 TP0] Decode batch. #running-req: 1, #token: 4079, token usage: 0.11, gen throughput (token/s): 48.11, #queue-req: 0
  23795. 2025-07-20 17:37:17,782 - __main__ - INFO - sglang running req: 1 queue req: 0
  23796. 2025-07-20 17:37:18,615 - sglang - INFO - [2025-07-20 17:37:18 TP0] Decode batch. #running-req: 1, #token: 4119, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
  23797. 2025-07-20 17:37:18,615 - __main__ - INFO - sglang running req: 1 queue req: 0
  23798. 2025-07-20 17:37:19,449 - sglang - INFO - [2025-07-20 17:37:19 TP0] Decode batch. #running-req: 1, #token: 4159, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
  23799. 2025-07-20 17:37:19,449 - __main__ - INFO - sglang running req: 1 queue req: 0
  23800. 2025-07-20 17:37:20,282 - sglang - INFO - [2025-07-20 17:37:20 TP0] Decode batch. #running-req: 1, #token: 4199, token usage: 0.11, gen throughput (token/s): 48.02, #queue-req: 0
  23801. 2025-07-20 17:37:20,282 - __main__ - INFO - sglang running req: 1 queue req: 0
  23802. 2025-07-20 17:37:21,114 - sglang - INFO - [2025-07-20 17:37:21 TP0] Decode batch. #running-req: 1, #token: 4239, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
  23803. 2025-07-20 17:37:21,114 - __main__ - INFO - sglang running req: 1 queue req: 0
  23804. 2025-07-20 17:37:21,947 - sglang - INFO - [2025-07-20 17:37:21 TP0] Decode batch. #running-req: 1, #token: 4279, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
  23805. 2025-07-20 17:37:21,948 - __main__ - INFO - sglang running req: 1 queue req: 0
  23806. 2025-07-20 17:37:22,781 - sglang - INFO - [2025-07-20 17:37:22 TP0] Decode batch. #running-req: 1, #token: 4319, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
  23807. 2025-07-20 17:37:22,781 - __main__ - INFO - sglang running req: 1 queue req: 0
  23808. 2025-07-20 17:37:23,614 - sglang - INFO - [2025-07-20 17:37:23 TP0] Decode batch. #running-req: 1, #token: 4359, token usage: 0.11, gen throughput (token/s): 48.04, #queue-req: 0
  23809. 2025-07-20 17:37:23,614 - __main__ - INFO - sglang running req: 1 queue req: 0
  23810. 2025-07-20 17:37:24,447 - sglang - INFO - [2025-07-20 17:37:24 TP0] Decode batch. #running-req: 1, #token: 4399, token usage: 0.12, gen throughput (token/s): 47.98, #queue-req: 0
  23811. 2025-07-20 17:37:24,447 - __main__ - INFO - sglang running req: 1 queue req: 0
  23812. 2025-07-20 17:37:25,281 - sglang - INFO - [2025-07-20 17:37:25 TP0] Decode batch. #running-req: 1, #token: 4439, token usage: 0.12, gen throughput (token/s): 47.97, #queue-req: 0
  23813. 2025-07-20 17:37:25,281 - __main__ - INFO - sglang running req: 1 queue req: 0
  23814. 2025-07-20 17:37:26,116 - sglang - INFO - [2025-07-20 17:37:26 TP0] Decode batch. #running-req: 1, #token: 4479, token usage: 0.12, gen throughput (token/s): 47.93, #queue-req: 0
  23815. 2025-07-20 17:37:26,116 - __main__ - INFO - sglang running req: 1 queue req: 0
  23816. 2025-07-20 17:37:26,863 - __main__ - INFO - Queue remaining: 9
  23817. 2025-07-20 17:37:26,863 - __main__ - INFO -
  23818. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23819. ----------------------------------------------------------------------------------
  23820. finished_input_tokens 402.66 469.36
  23821. finished_output_tokens 97.24 112.60
  23822. sglang_input_tokens 447.63 564.80
  23823. sglang_output_tokens 114.24 143.21
  23824. 2025-07-20 17:37:26,864 - __main__ - INFO -
  23825. Worker ID | finished | started
  23826. ----------+----------+--------
  23827. 0 | 13 | 14
  23828. 2025-07-20 17:37:26,950 - sglang - INFO - [2025-07-20 17:37:26 TP0] Decode batch. #running-req: 1, #token: 4519, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
  23829. 2025-07-20 17:37:26,950 - __main__ - INFO - sglang running req: 1 queue req: 0
  23830. 2025-07-20 17:37:27,785 - sglang - INFO - [2025-07-20 17:37:27 TP0] Decode batch. #running-req: 1, #token: 4559, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
  23831. 2025-07-20 17:37:27,785 - __main__ - INFO - sglang running req: 1 queue req: 0
  23832. 2025-07-20 17:37:28,619 - sglang - INFO - [2025-07-20 17:37:28 TP0] Decode batch. #running-req: 1, #token: 4599, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
  23833. 2025-07-20 17:37:28,620 - __main__ - INFO - sglang running req: 1 queue req: 0
  23834. 2025-07-20 17:37:29,453 - sglang - INFO - [2025-07-20 17:37:29 TP0] Decode batch. #running-req: 1, #token: 4639, token usage: 0.12, gen throughput (token/s): 47.96, #queue-req: 0
  23835. 2025-07-20 17:37:29,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  23836. 2025-07-20 17:37:30,289 - sglang - INFO - [2025-07-20 17:37:30 TP0] Decode batch. #running-req: 1, #token: 4679, token usage: 0.12, gen throughput (token/s): 47.89, #queue-req: 0
  23837. 2025-07-20 17:37:30,289 - __main__ - INFO - sglang running req: 1 queue req: 0
  23838. 2025-07-20 17:37:31,123 - sglang - INFO - [2025-07-20 17:37:31 TP0] Decode batch. #running-req: 1, #token: 4719, token usage: 0.12, gen throughput (token/s): 47.91, #queue-req: 0
  23839. 2025-07-20 17:37:31,124 - __main__ - INFO - sglang running req: 1 queue req: 0
  23840. 2025-07-20 17:37:31,958 - sglang - INFO - [2025-07-20 17:37:31 TP0] Decode batch. #running-req: 1, #token: 4759, token usage: 0.13, gen throughput (token/s): 47.94, #queue-req: 0
  23841. 2025-07-20 17:37:31,958 - __main__ - INFO - sglang running req: 1 queue req: 0
  23842. 2025-07-20 17:37:32,794 - sglang - INFO - [2025-07-20 17:37:32 TP0] Decode batch. #running-req: 1, #token: 4799, token usage: 0.13, gen throughput (token/s): 47.87, #queue-req: 0
  23843. 2025-07-20 17:37:32,794 - __main__ - INFO - sglang running req: 1 queue req: 0
  23844. 2025-07-20 17:37:33,629 - sglang - INFO - [2025-07-20 17:37:33 TP0] Decode batch. #running-req: 1, #token: 4839, token usage: 0.13, gen throughput (token/s): 47.89, #queue-req: 0
  23845. 2025-07-20 17:37:33,629 - __main__ - INFO - sglang running req: 1 queue req: 0
  23846. 2025-07-20 17:37:34,464 - sglang - INFO - [2025-07-20 17:37:34 TP0] Decode batch. #running-req: 1, #token: 4879, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
  23847. 2025-07-20 17:37:34,464 - __main__ - INFO - sglang running req: 1 queue req: 0
  23848. 2025-07-20 17:37:35,299 - sglang - INFO - [2025-07-20 17:37:35 TP0] Decode batch. #running-req: 1, #token: 4919, token usage: 0.13, gen throughput (token/s): 47.90, #queue-req: 0
  23849. 2025-07-20 17:37:35,299 - __main__ - INFO - sglang running req: 1 queue req: 0
  23850. 2025-07-20 17:37:36,135 - sglang - INFO - [2025-07-20 17:37:36 TP0] Decode batch. #running-req: 1, #token: 4959, token usage: 0.13, gen throughput (token/s): 47.82, #queue-req: 0
  23851. 2025-07-20 17:37:36,135 - __main__ - INFO - sglang running req: 1 queue req: 0
  23852. 2025-07-20 17:37:36,865 - __main__ - INFO - Queue remaining: 9
  23853. 2025-07-20 17:37:36,866 - __main__ - INFO -
  23854. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23855. ----------------------------------------------------------------------------------
  23856. finished_input_tokens 397.43 469.36
  23857. finished_output_tokens 95.98 112.60
  23858. sglang_input_tokens 441.82 564.80
  23859. sglang_output_tokens 112.75 143.21
  23860. 2025-07-20 17:37:36,866 - __main__ - INFO -
  23861. Worker ID | finished | started
  23862. ----------+----------+--------
  23863. 0 | 13 | 14
  23864. 2025-07-20 17:37:36,971 - sglang - INFO - [2025-07-20 17:37:36 TP0] Decode batch. #running-req: 1, #token: 4999, token usage: 0.13, gen throughput (token/s): 47.84, #queue-req: 0
  23865. 2025-07-20 17:37:36,971 - __main__ - INFO - sglang running req: 1 queue req: 0
  23866. 2025-07-20 17:37:37,806 - sglang - INFO - [2025-07-20 17:37:37 TP0] Decode batch. #running-req: 1, #token: 5039, token usage: 0.13, gen throughput (token/s): 47.91, #queue-req: 0
  23867. 2025-07-20 17:37:37,807 - __main__ - INFO - sglang running req: 1 queue req: 0
  23868. 2025-07-20 17:37:38,642 - sglang - INFO - [2025-07-20 17:37:38 TP0] Decode batch. #running-req: 1, #token: 5079, token usage: 0.13, gen throughput (token/s): 47.85, #queue-req: 0
  23869. 2025-07-20 17:37:38,643 - __main__ - INFO - sglang running req: 1 queue req: 0
  23870. 2025-07-20 17:37:39,005 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG344010604301201.pdf-10: Unterminated string starting at: line 1 column 125 (char 124)
  23871. 2025-07-20 17:37:39,139 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301201.pdf-10
  23872. 2025-07-20 17:37:39,321 - sglang - INFO - [2025-07-20 17:37:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2097, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23873. 2025-07-20 17:37:39,321 - __main__ - INFO - sglang running req: 0 queue req: 0
  23874. 2025-07-20 17:37:40,474 - sglang - INFO - [2025-07-20 17:37:40 TP0] Decode batch. #running-req: 1, #token: 2120, token usage: 0.06, gen throughput (token/s): 21.83, #queue-req: 0
  23875. 2025-07-20 17:37:40,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  23876. 2025-07-20 17:37:41,301 - sglang - INFO - [2025-07-20 17:37:41 TP0] Decode batch. #running-req: 1, #token: 2160, token usage: 0.06, gen throughput (token/s): 48.39, #queue-req: 0
  23877. 2025-07-20 17:37:41,301 - __main__ - INFO - sglang running req: 1 queue req: 0
  23878. 2025-07-20 17:37:42,128 - sglang - INFO - [2025-07-20 17:37:42 TP0] Decode batch. #running-req: 1, #token: 2200, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
  23879. 2025-07-20 17:37:42,128 - __main__ - INFO - sglang running req: 1 queue req: 0
  23880. 2025-07-20 17:37:42,956 - sglang - INFO - [2025-07-20 17:37:42 TP0] Decode batch. #running-req: 1, #token: 2240, token usage: 0.06, gen throughput (token/s): 48.32, #queue-req: 0
  23881. 2025-07-20 17:37:42,956 - __main__ - INFO - sglang running req: 1 queue req: 0
  23882. 2025-07-20 17:37:43,784 - sglang - INFO - [2025-07-20 17:37:43 TP0] Decode batch. #running-req: 1, #token: 2280, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
  23883. 2025-07-20 17:37:43,784 - __main__ - INFO - sglang running req: 1 queue req: 0
  23884. 2025-07-20 17:37:44,611 - sglang - INFO - [2025-07-20 17:37:44 TP0] Decode batch. #running-req: 1, #token: 2320, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
  23885. 2025-07-20 17:37:44,611 - __main__ - INFO - sglang running req: 1 queue req: 0
  23886. 2025-07-20 17:37:45,438 - sglang - INFO - [2025-07-20 17:37:45 TP0] Decode batch. #running-req: 1, #token: 2360, token usage: 0.06, gen throughput (token/s): 48.36, #queue-req: 0
  23887. 2025-07-20 17:37:45,438 - __main__ - INFO - sglang running req: 1 queue req: 0
  23888. 2025-07-20 17:37:46,266 - sglang - INFO - [2025-07-20 17:37:46 TP0] Decode batch. #running-req: 1, #token: 2400, token usage: 0.06, gen throughput (token/s): 48.31, #queue-req: 0
  23889. 2025-07-20 17:37:46,266 - __main__ - INFO - sglang running req: 1 queue req: 0
  23890. 2025-07-20 17:37:46,867 - __main__ - INFO - Queue remaining: 9
  23891. 2025-07-20 17:37:46,868 - __main__ - INFO -
  23892. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23893. ----------------------------------------------------------------------------------
  23894. finished_input_tokens 392.34 347.11
  23895. finished_output_tokens 94.75 82.61
  23896. sglang_input_tokens 438.85 456.02
  23897. sglang_output_tokens 115.15 124.80
  23898. 2025-07-20 17:37:46,868 - __main__ - INFO -
  23899. Worker ID | finished | started
  23900. ----------+----------+--------
  23901. 0 | 13 | 14
  23902. 2025-07-20 17:37:47,095 - sglang - INFO - [2025-07-20 17:37:47 TP0] Decode batch. #running-req: 1, #token: 2440, token usage: 0.06, gen throughput (token/s): 48.28, #queue-req: 0
  23903. 2025-07-20 17:37:47,095 - __main__ - INFO - sglang running req: 1 queue req: 0
  23904. 2025-07-20 17:37:47,576 - __main__ - INFO - Finished TaskGroup for worker on 06798e8f7cc26525f138f26354ffab7c63074f2c
  23905. 2025-07-20 17:37:47,576 - __main__ - INFO - Got 1 docs for 06798e8f7cc26525f138f26354ffab7c63074f2c
  23906. 2025-07-20 17:37:47,578 - __main__ - INFO - Worker 0 processing work item c07c41e4c78e5049d035d0059223ac0adc60be49
  23907. 2025-07-20 17:37:47,578 - __main__ - INFO - Created all tasks for c07c41e4c78e5049d035d0059223ac0adc60be49
  23908. 2025-07-20 17:37:47,584 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG3440106011000.pdf in worker 0
  23909. 2025-07-20 17:37:47,712 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-2
  23910. 2025-07-20 17:37:47,718 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-3
  23911. 2025-07-20 17:37:47,723 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-4
  23912. 2025-07-20 17:37:47,727 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-6
  23913. 2025-07-20 17:37:47,763 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-1
  23914. 2025-07-20 17:37:47,814 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106011000.pdf-5
  23915. 2025-07-20 17:37:47,888 - sglang - INFO - [2025-07-20 17:37:47 TP0] Prefill batch. #new-seq: 1, #new-token: 1735, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23916. 2025-07-20 17:37:47,889 - __main__ - INFO - sglang running req: 0 queue req: 0
  23917. 2025-07-20 17:37:48,506 - sglang - INFO - [2025-07-20 17:37:48 TP0] Prefill batch. #new-seq: 5, #new-token: 10414, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  23918. 2025-07-20 17:37:48,506 - __main__ - INFO - sglang running req: 1 queue req: 0
  23919. 2025-07-20 17:37:52,075 - sglang - INFO - [2025-07-20 17:37:52 TP0] Decode batch. #running-req: 6, #token: 12251, token usage: 0.32, gen throughput (token/s): 25.10, #queue-req: 0
  23920. 2025-07-20 17:37:52,075 - __main__ - INFO - sglang running req: 6 queue req: 0
  23921. 2025-07-20 17:37:52,947 - sglang - INFO - [2025-07-20 17:37:52 TP0] Decode batch. #running-req: 6, #token: 12491, token usage: 0.33, gen throughput (token/s): 275.05, #queue-req: 0
  23922. 2025-07-20 17:37:52,947 - __main__ - INFO - sglang running req: 6 queue req: 0
  23923. 2025-07-20 17:37:53,819 - sglang - INFO - [2025-07-20 17:37:53 TP0] Decode batch. #running-req: 6, #token: 12731, token usage: 0.34, gen throughput (token/s): 275.14, #queue-req: 0
  23924. 2025-07-20 17:37:53,820 - __main__ - INFO - sglang running req: 6 queue req: 0
  23925. 2025-07-20 17:37:54,691 - sglang - INFO - [2025-07-20 17:37:54 TP0] Decode batch. #running-req: 6, #token: 12971, token usage: 0.34, gen throughput (token/s): 275.26, #queue-req: 0
  23926. 2025-07-20 17:37:54,691 - __main__ - INFO - sglang running req: 6 queue req: 0
  23927. 2025-07-20 17:37:55,565 - sglang - INFO - [2025-07-20 17:37:55 TP0] Decode batch. #running-req: 6, #token: 13211, token usage: 0.35, gen throughput (token/s): 274.74, #queue-req: 0
  23928. 2025-07-20 17:37:55,565 - __main__ - INFO - sglang running req: 6 queue req: 0
  23929. 2025-07-20 17:37:56,439 - sglang - INFO - [2025-07-20 17:37:56 TP0] Decode batch. #running-req: 6, #token: 13451, token usage: 0.35, gen throughput (token/s): 274.53, #queue-req: 0
  23930. 2025-07-20 17:37:56,439 - __main__ - INFO - sglang running req: 6 queue req: 0
  23931. 2025-07-20 17:37:56,869 - __main__ - INFO - Queue remaining: 8
  23932. 2025-07-20 17:37:56,869 - __main__ - INFO -
  23933. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23934. ----------------------------------------------------------------------------------
  23935. finished_input_tokens 427.89 411.29
  23936. finished_output_tokens 102.78 93.83
  23937. sglang_input_tokens 437.79 430.13
  23938. sglang_output_tokens 114.45 114.60
  23939. 2025-07-20 17:37:56,870 - __main__ - INFO -
  23940. Worker ID | finished | started
  23941. ----------+----------+--------
  23942. 0 | 1 | 6
  23943. 2025-07-20 17:37:57,311 - sglang - INFO - [2025-07-20 17:37:57 TP0] Decode batch. #running-req: 5, #token: 11975, token usage: 0.32, gen throughput (token/s): 244.21, #queue-req: 0
  23944. 2025-07-20 17:37:57,312 - __main__ - INFO - sglang running req: 5 queue req: 0
  23945. 2025-07-20 17:37:58,178 - sglang - INFO - [2025-07-20 17:37:58 TP0] Decode batch. #running-req: 4, #token: 9769, token usage: 0.26, gen throughput (token/s): 215.78, #queue-req: 0
  23946. 2025-07-20 17:37:58,178 - __main__ - INFO - sglang running req: 4 queue req: 0
  23947. 2025-07-20 17:37:59,035 - sglang - INFO - [2025-07-20 17:37:59 TP0] Decode batch. #running-req: 4, #token: 9929, token usage: 0.26, gen throughput (token/s): 186.75, #queue-req: 0
  23948. 2025-07-20 17:37:59,035 - __main__ - INFO - sglang running req: 4 queue req: 0
  23949. 2025-07-20 17:37:59,889 - sglang - INFO - [2025-07-20 17:37:59 TP0] Decode batch. #running-req: 2, #token: 5608, token usage: 0.15, gen throughput (token/s): 156.80, #queue-req: 0
  23950. 2025-07-20 17:37:59,890 - __main__ - INFO - sglang running req: 2 queue req: 0
  23951. 2025-07-20 17:38:00,729 - sglang - INFO - [2025-07-20 17:38:00 TP0] Decode batch. #running-req: 2, #token: 5688, token usage: 0.15, gen throughput (token/s): 95.28, #queue-req: 0
  23952. 2025-07-20 17:38:00,729 - __main__ - INFO - sglang running req: 2 queue req: 0
  23953. 2025-07-20 17:38:01,567 - sglang - INFO - [2025-07-20 17:38:01 TP0] Decode batch. #running-req: 2, #token: 5768, token usage: 0.15, gen throughput (token/s): 95.41, #queue-req: 0
  23954. 2025-07-20 17:38:01,568 - __main__ - INFO - sglang running req: 2 queue req: 0
  23955. 2025-07-20 17:38:02,406 - sglang - INFO - [2025-07-20 17:38:02 TP0] Decode batch. #running-req: 2, #token: 5848, token usage: 0.15, gen throughput (token/s): 95.38, #queue-req: 0
  23956. 2025-07-20 17:38:02,406 - __main__ - INFO - sglang running req: 2 queue req: 0
  23957. 2025-07-20 17:38:03,246 - sglang - INFO - [2025-07-20 17:38:03 TP0] Decode batch. #running-req: 2, #token: 5928, token usage: 0.16, gen throughput (token/s): 95.27, #queue-req: 0
  23958. 2025-07-20 17:38:03,246 - __main__ - INFO - sglang running req: 2 queue req: 0
  23959. 2025-07-20 17:38:04,086 - sglang - INFO - [2025-07-20 17:38:04 TP0] Decode batch. #running-req: 2, #token: 6008, token usage: 0.16, gen throughput (token/s): 95.17, #queue-req: 0
  23960. 2025-07-20 17:38:04,087 - __main__ - INFO - sglang running req: 2 queue req: 0
  23961. 2025-07-20 17:38:04,926 - sglang - INFO - [2025-07-20 17:38:04 TP0] Decode batch. #running-req: 2, #token: 6088, token usage: 0.16, gen throughput (token/s): 95.32, #queue-req: 0
  23962. 2025-07-20 17:38:04,926 - __main__ - INFO - sglang running req: 2 queue req: 0
  23963. 2025-07-20 17:38:05,756 - sglang - INFO - [2025-07-20 17:38:05 TP0] Decode batch. #running-req: 1, #token: 2983, token usage: 0.08, gen throughput (token/s): 50.61, #queue-req: 0
  23964. 2025-07-20 17:38:05,756 - __main__ - INFO - sglang running req: 1 queue req: 0
  23965. 2025-07-20 17:38:06,585 - sglang - INFO - [2025-07-20 17:38:06 TP0] Decode batch. #running-req: 1, #token: 3023, token usage: 0.08, gen throughput (token/s): 48.20, #queue-req: 0
  23966. 2025-07-20 17:38:06,586 - __main__ - INFO - sglang running req: 1 queue req: 0
  23967. 2025-07-20 17:38:06,872 - __main__ - INFO - Queue remaining: 8
  23968. 2025-07-20 17:38:06,872 - __main__ - INFO -
  23969. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  23970. ----------------------------------------------------------------------------------
  23971. finished_input_tokens 422.55 411.29
  23972. finished_output_tokens 101.49 93.83
  23973. sglang_input_tokens 442.77 440.47
  23974. sglang_output_tokens 115.06 117.34
  23975. 2025-07-20 17:38:06,873 - __main__ - INFO -
  23976. Worker ID | finished | started
  23977. ----------+----------+--------
  23978. 0 | 5 | 6
  23979. 2025-07-20 17:38:07,421 - sglang - INFO - [2025-07-20 17:38:07 TP0] Decode batch. #running-req: 1, #token: 3063, token usage: 0.08, gen throughput (token/s): 47.88, #queue-req: 0
  23980. 2025-07-20 17:38:07,421 - __main__ - INFO - sglang running req: 1 queue req: 0
  23981. 2025-07-20 17:38:08,254 - sglang - INFO - [2025-07-20 17:38:08 TP0] Decode batch. #running-req: 1, #token: 3103, token usage: 0.08, gen throughput (token/s): 48.04, #queue-req: 0
  23982. 2025-07-20 17:38:08,254 - __main__ - INFO - sglang running req: 1 queue req: 0
  23983. 2025-07-20 17:38:09,083 - sglang - INFO - [2025-07-20 17:38:09 TP0] Decode batch. #running-req: 1, #token: 3143, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
  23984. 2025-07-20 17:38:09,084 - __main__ - INFO - sglang running req: 1 queue req: 0
  23985. 2025-07-20 17:38:09,588 - __main__ - INFO - Finished TaskGroup for worker on c07c41e4c78e5049d035d0059223ac0adc60be49
  23986. 2025-07-20 17:38:09,589 - __main__ - INFO - Got 1 docs for c07c41e4c78e5049d035d0059223ac0adc60be49
  23987. 2025-07-20 17:38:09,590 - __main__ - INFO - Worker 0 processing work item 8450bc4e95932e232e795c885ec59ab601993cab
  23988. 2025-07-20 17:38:09,590 - __main__ - INFO - Created all tasks for 8450bc4e95932e232e795c885ec59ab601993cab
  23989. 2025-07-20 17:38:09,598 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602901001.pdf in worker 0
  23990. 2025-07-20 17:38:09,706 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-6
  23991. 2025-07-20 17:38:09,755 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-3
  23992. 2025-07-20 17:38:09,767 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-2
  23993. 2025-07-20 17:38:09,776 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-1
  23994. 2025-07-20 17:38:09,839 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-4
  23995. 2025-07-20 17:38:09,847 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602901001.pdf-5
  23996. 2025-07-20 17:38:09,874 - sglang - INFO - [2025-07-20 17:38:09 TP0] Prefill batch. #new-seq: 1, #new-token: 1457, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  23997. 2025-07-20 17:38:09,874 - __main__ - INFO - sglang running req: 0 queue req: 0
  23998. 2025-07-20 17:38:10,451 - sglang - INFO - [2025-07-20 17:38:10 TP0] Prefill batch. #new-seq: 5, #new-token: 11323, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  23999. 2025-07-20 17:38:10,452 - __main__ - INFO - sglang running req: 1 queue req: 0
  24000. 2025-07-20 17:38:14,207 - sglang - INFO - [2025-07-20 17:38:14 TP0] Decode batch. #running-req: 6, #token: 12876, token usage: 0.34, gen throughput (token/s): 23.42, #queue-req: 0
  24001. 2025-07-20 17:38:14,207 - __main__ - INFO - sglang running req: 6 queue req: 0
  24002. 2025-07-20 17:38:15,080 - sglang - INFO - [2025-07-20 17:38:15 TP0] Decode batch. #running-req: 6, #token: 13116, token usage: 0.35, gen throughput (token/s): 274.58, #queue-req: 0
  24003. 2025-07-20 17:38:15,081 - __main__ - INFO - sglang running req: 6 queue req: 0
  24004. 2025-07-20 17:38:15,955 - sglang - INFO - [2025-07-20 17:38:15 TP0] Decode batch. #running-req: 6, #token: 13356, token usage: 0.35, gen throughput (token/s): 274.58, #queue-req: 0
  24005. 2025-07-20 17:38:15,955 - __main__ - INFO - sglang running req: 6 queue req: 0
  24006. 2025-07-20 17:38:16,828 - sglang - INFO - [2025-07-20 17:38:16 TP0] Decode batch. #running-req: 6, #token: 13596, token usage: 0.36, gen throughput (token/s): 274.75, #queue-req: 0
  24007. 2025-07-20 17:38:16,828 - __main__ - INFO - sglang running req: 6 queue req: 0
  24008. 2025-07-20 17:38:16,874 - __main__ - INFO - Queue remaining: 7
  24009. 2025-07-20 17:38:16,874 - __main__ - INFO -
  24010. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24011. ----------------------------------------------------------------------------------
  24012. finished_input_tokens 432.32 451.78
  24013. finished_output_tokens 103.58 102.86
  24014. sglang_input_tokens 440.18 442.48
  24015. sglang_output_tokens 114.68 118.94
  24016. 2025-07-20 17:38:16,874 - __main__ - INFO -
  24017. Worker ID | started
  24018. ----------+--------
  24019. 0 | 6
  24020. 2025-07-20 17:38:17,704 - sglang - INFO - [2025-07-20 17:38:17 TP0] Decode batch. #running-req: 6, #token: 13836, token usage: 0.36, gen throughput (token/s): 274.17, #queue-req: 0
  24021. 2025-07-20 17:38:17,704 - __main__ - INFO - sglang running req: 6 queue req: 0
  24022. 2025-07-20 17:38:18,580 - sglang - INFO - [2025-07-20 17:38:18 TP0] Decode batch. #running-req: 6, #token: 14076, token usage: 0.37, gen throughput (token/s): 273.80, #queue-req: 0
  24023. 2025-07-20 17:38:18,580 - __main__ - INFO - sglang running req: 6 queue req: 0
  24024. 2025-07-20 17:38:19,458 - sglang - INFO - [2025-07-20 17:38:19 TP0] Decode batch. #running-req: 5, #token: 11713, token usage: 0.31, gen throughput (token/s): 268.91, #queue-req: 0
  24025. 2025-07-20 17:38:19,458 - __main__ - INFO - sglang running req: 5 queue req: 0
  24026. 2025-07-20 17:38:20,319 - sglang - INFO - [2025-07-20 17:38:20 TP0] Decode batch. #running-req: 4, #token: 10160, token usage: 0.27, gen throughput (token/s): 198.44, #queue-req: 0
  24027. 2025-07-20 17:38:20,320 - __main__ - INFO - sglang running req: 4 queue req: 0
  24028. 2025-07-20 17:38:21,177 - sglang - INFO - [2025-07-20 17:38:21 TP0] Decode batch. #running-req: 4, #token: 10320, token usage: 0.27, gen throughput (token/s): 186.49, #queue-req: 0
  24029. 2025-07-20 17:38:21,178 - __main__ - INFO - sglang running req: 4 queue req: 0
  24030. 2025-07-20 17:38:22,035 - sglang - INFO - [2025-07-20 17:38:22 TP0] Decode batch. #running-req: 4, #token: 10480, token usage: 0.28, gen throughput (token/s): 186.46, #queue-req: 0
  24031. 2025-07-20 17:38:22,036 - __main__ - INFO - sglang running req: 4 queue req: 0
  24032. 2025-07-20 17:38:22,895 - sglang - INFO - [2025-07-20 17:38:22 TP0] Decode batch. #running-req: 4, #token: 10640, token usage: 0.28, gen throughput (token/s): 186.12, #queue-req: 0
  24033. 2025-07-20 17:38:22,895 - __main__ - INFO - sglang running req: 4 queue req: 0
  24034. 2025-07-20 17:38:23,755 - sglang - INFO - [2025-07-20 17:38:23 TP0] Decode batch. #running-req: 4, #token: 10800, token usage: 0.28, gen throughput (token/s): 186.09, #queue-req: 0
  24035. 2025-07-20 17:38:23,755 - __main__ - INFO - sglang running req: 4 queue req: 0
  24036. 2025-07-20 17:38:24,614 - sglang - INFO - [2025-07-20 17:38:24 TP0] Decode batch. #running-req: 4, #token: 10960, token usage: 0.29, gen throughput (token/s): 186.30, #queue-req: 0
  24037. 2025-07-20 17:38:24,614 - __main__ - INFO - sglang running req: 4 queue req: 0
  24038. 2025-07-20 17:38:25,473 - sglang - INFO - [2025-07-20 17:38:25 TP0] Decode batch. #running-req: 4, #token: 11120, token usage: 0.29, gen throughput (token/s): 186.27, #queue-req: 0
  24039. 2025-07-20 17:38:25,473 - __main__ - INFO - sglang running req: 4 queue req: 0
  24040. 2025-07-20 17:38:26,321 - sglang - INFO - [2025-07-20 17:38:26 TP0] Decode batch. #running-req: 2, #token: 6149, token usage: 0.16, gen throughput (token/s): 129.65, #queue-req: 0
  24041. 2025-07-20 17:38:26,321 - __main__ - INFO - sglang running req: 2 queue req: 0
  24042. 2025-07-20 17:38:26,875 - __main__ - INFO - Queue remaining: 7
  24043. 2025-07-20 17:38:26,875 - __main__ - INFO -
  24044. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24045. ----------------------------------------------------------------------------------
  24046. finished_input_tokens 427.05 412.09
  24047. finished_output_tokens 102.32 94.25
  24048. sglang_input_tokens 444.30 439.13
  24049. sglang_output_tokens 115.27 117.64
  24050. 2025-07-20 17:38:26,876 - __main__ - INFO -
  24051. Worker ID | finished | started
  24052. ----------+----------+--------
  24053. 0 | 4 | 6
  24054. 2025-07-20 17:38:27,162 - sglang - INFO - [2025-07-20 17:38:27 TP0] Decode batch. #running-req: 2, #token: 6229, token usage: 0.16, gen throughput (token/s): 95.16, #queue-req: 0
  24055. 2025-07-20 17:38:27,162 - __main__ - INFO - sglang running req: 2 queue req: 0
  24056. 2025-07-20 17:38:28,002 - sglang - INFO - [2025-07-20 17:38:28 TP0] Decode batch. #running-req: 2, #token: 6309, token usage: 0.17, gen throughput (token/s): 95.19, #queue-req: 0
  24057. 2025-07-20 17:38:28,003 - __main__ - INFO - sglang running req: 2 queue req: 0
  24058. 2025-07-20 17:38:28,835 - sglang - INFO - [2025-07-20 17:38:28 TP0] Decode batch. #running-req: 1, #token: 3120, token usage: 0.08, gen throughput (token/s): 60.02, #queue-req: 0
  24059. 2025-07-20 17:38:28,836 - __main__ - INFO - sglang running req: 1 queue req: 0
  24060. 2025-07-20 17:38:29,665 - sglang - INFO - [2025-07-20 17:38:29 TP0] Decode batch. #running-req: 1, #token: 3160, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
  24061. 2025-07-20 17:38:29,665 - __main__ - INFO - sglang running req: 1 queue req: 0
  24062. 2025-07-20 17:38:30,496 - sglang - INFO - [2025-07-20 17:38:30 TP0] Decode batch. #running-req: 1, #token: 3200, token usage: 0.08, gen throughput (token/s): 48.12, #queue-req: 0
  24063. 2025-07-20 17:38:30,497 - __main__ - INFO - sglang running req: 1 queue req: 0
  24064. 2025-07-20 17:38:30,751 - __main__ - INFO - Finished TaskGroup for worker on 8450bc4e95932e232e795c885ec59ab601993cab
  24065. 2025-07-20 17:38:30,751 - __main__ - INFO - Got 1 docs for 8450bc4e95932e232e795c885ec59ab601993cab
  24066. 2025-07-20 17:38:30,753 - __main__ - INFO - Worker 0 processing work item 662cdaa711447efb75b7c325ea177326afc2747b
  24067. 2025-07-20 17:38:30,753 - __main__ - INFO - Created all tasks for 662cdaa711447efb75b7c325ea177326afc2747b
  24068. 2025-07-20 17:38:30,761 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG3440106016000.pdf in worker 0
  24069. 2025-07-20 17:38:30,870 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-8
  24070. 2025-07-20 17:38:30,881 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-7
  24071. 2025-07-20 17:38:30,891 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-4
  24072. 2025-07-20 17:38:30,905 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-10
  24073. 2025-07-20 17:38:30,910 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-5
  24074. 2025-07-20 17:38:30,914 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-3
  24075. 2025-07-20 17:38:30,919 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-6
  24076. 2025-07-20 17:38:30,942 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-2
  24077. 2025-07-20 17:38:30,960 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-1
  24078. 2025-07-20 17:38:31,000 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106016000.pdf-9
  24079. 2025-07-20 17:38:31,051 - sglang - INFO - [2025-07-20 17:38:31 TP0] Prefill batch. #new-seq: 1, #new-token: 1488, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24080. 2025-07-20 17:38:31,051 - __main__ - INFO - sglang running req: 0 queue req: 0
  24081. 2025-07-20 17:38:31,639 - sglang - INFO - [2025-07-20 17:38:31 TP0] Prefill batch. #new-seq: 6, #new-token: 11875, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 3
  24082. 2025-07-20 17:38:31,640 - __main__ - INFO - sglang running req: 1 queue req: 3
  24083. 2025-07-20 17:38:35,961 - sglang - INFO - [2025-07-20 17:38:35 TP0] Decode batch. #running-req: 7, #token: 13559, token usage: 0.36, gen throughput (token/s): 38.06, #queue-req: 3
  24084. 2025-07-20 17:38:35,961 - __main__ - INFO - sglang running req: 7 queue req: 3
  24085. 2025-07-20 17:38:36,837 - sglang - INFO - [2025-07-20 17:38:36 TP0] Decode batch. #running-req: 7, #token: 13839, token usage: 0.36, gen throughput (token/s): 319.49, #queue-req: 3
  24086. 2025-07-20 17:38:36,838 - __main__ - INFO - sglang running req: 7 queue req: 3
  24087. 2025-07-20 17:38:36,877 - __main__ - INFO - Queue remaining: 6
  24088. 2025-07-20 17:38:36,877 - __main__ - INFO -
  24089. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24090. ----------------------------------------------------------------------------------
  24091. finished_input_tokens 437.30 454.69
  24092. finished_output_tokens 104.80 104.52
  24093. sglang_input_tokens 444.96 449.87
  24094. sglang_output_tokens 115.63 121.25
  24095. 2025-07-20 17:38:36,877 - __main__ - INFO -
  24096. Worker ID | started
  24097. ----------+--------
  24098. 0 | 10
  24099. 2025-07-20 17:38:37,715 - sglang - INFO - [2025-07-20 17:38:37 TP0] Decode batch. #running-req: 7, #token: 14119, token usage: 0.37, gen throughput (token/s): 319.15, #queue-req: 3
  24100. 2025-07-20 17:38:37,715 - __main__ - INFO - sglang running req: 7 queue req: 3
  24101. 2025-07-20 17:38:38,592 - sglang - INFO - [2025-07-20 17:38:38 TP0] Decode batch. #running-req: 7, #token: 14399, token usage: 0.38, gen throughput (token/s): 319.03, #queue-req: 3
  24102. 2025-07-20 17:38:38,593 - __main__ - INFO - sglang running req: 7 queue req: 3
  24103. 2025-07-20 17:38:39,470 - sglang - INFO - [2025-07-20 17:38:39 TP0] Decode batch. #running-req: 7, #token: 14679, token usage: 0.39, gen throughput (token/s): 319.13, #queue-req: 3
  24104. 2025-07-20 17:38:39,470 - __main__ - INFO - sglang running req: 7 queue req: 3
  24105. 2025-07-20 17:38:40,261 - sglang - INFO - [2025-07-20 17:38:40 TP0] Prefill batch. #new-seq: 3, #new-token: 6692, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.34, #running-req: 6, #queue-req: 0
  24106. 2025-07-20 17:38:40,261 - __main__ - INFO - sglang running req: 6 queue req: 0
  24107. 2025-07-20 17:38:42,417 - sglang - INFO - [2025-07-20 17:38:42 TP0] Decode batch. #running-req: 9, #token: 19574, token usage: 0.52, gen throughput (token/s): 97.37, #queue-req: 0
  24108. 2025-07-20 17:38:42,418 - __main__ - INFO - sglang running req: 9 queue req: 0
  24109. 2025-07-20 17:38:43,359 - sglang - INFO - [2025-07-20 17:38:43 TP0] Decode batch. #running-req: 9, #token: 18178, token usage: 0.48, gen throughput (token/s): 382.21, #queue-req: 0
  24110. 2025-07-20 17:38:43,360 - __main__ - INFO - sglang running req: 9 queue req: 0
  24111. 2025-07-20 17:38:44,241 - sglang - INFO - [2025-07-20 17:38:44 TP0] Decode batch. #running-req: 6, #token: 14263, token usage: 0.38, gen throughput (token/s): 316.43, #queue-req: 0
  24112. 2025-07-20 17:38:44,241 - __main__ - INFO - sglang running req: 6 queue req: 0
  24113. 2025-07-20 17:38:45,119 - sglang - INFO - [2025-07-20 17:38:45 TP0] Decode batch. #running-req: 6, #token: 14503, token usage: 0.38, gen throughput (token/s): 273.35, #queue-req: 0
  24114. 2025-07-20 17:38:45,119 - __main__ - INFO - sglang running req: 6 queue req: 0
  24115. 2025-07-20 17:38:45,998 - sglang - INFO - [2025-07-20 17:38:45 TP0] Decode batch. #running-req: 5, #token: 12142, token usage: 0.32, gen throughput (token/s): 269.58, #queue-req: 0
  24116. 2025-07-20 17:38:45,998 - __main__ - INFO - sglang running req: 5 queue req: 0
  24117. 2025-07-20 17:38:46,856 - sglang - INFO - [2025-07-20 17:38:46 TP0] Decode batch. #running-req: 4, #token: 9743, token usage: 0.26, gen throughput (token/s): 192.32, #queue-req: 0
  24118. 2025-07-20 17:38:46,856 - __main__ - INFO - sglang running req: 4 queue req: 0
  24119. 2025-07-20 17:38:46,879 - __main__ - INFO - Queue remaining: 6
  24120. 2025-07-20 17:38:46,879 - __main__ - INFO -
  24121. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24122. ----------------------------------------------------------------------------------
  24123. finished_input_tokens 432.09 419.63
  24124. finished_output_tokens 103.55 96.48
  24125. sglang_input_tokens 453.17 471.45
  24126. sglang_output_tokens 116.46 122.65
  24127. 2025-07-20 17:38:46,879 - __main__ - INFO -
  24128. Worker ID | finished | started
  24129. ----------+----------+--------
  24130. 0 | 6 | 10
  24131. 2025-07-20 17:38:47,712 - sglang - INFO - [2025-07-20 17:38:47 TP0] Decode batch. #running-req: 4, #token: 9903, token usage: 0.26, gen throughput (token/s): 186.97, #queue-req: 0
  24132. 2025-07-20 17:38:47,712 - __main__ - INFO - sglang running req: 4 queue req: 0
  24133. 2025-07-20 17:38:48,568 - sglang - INFO - [2025-07-20 17:38:48 TP0] Decode batch. #running-req: 4, #token: 10063, token usage: 0.26, gen throughput (token/s): 186.87, #queue-req: 0
  24134. 2025-07-20 17:38:48,568 - __main__ - INFO - sglang running req: 4 queue req: 0
  24135. 2025-07-20 17:38:49,426 - sglang - INFO - [2025-07-20 17:38:49 TP0] Decode batch. #running-req: 4, #token: 10223, token usage: 0.27, gen throughput (token/s): 186.52, #queue-req: 0
  24136. 2025-07-20 17:38:49,426 - __main__ - INFO - sglang running req: 4 queue req: 0
  24137. 2025-07-20 17:38:50,280 - sglang - INFO - [2025-07-20 17:38:50 TP0] Decode batch. #running-req: 3, #token: 7784, token usage: 0.20, gen throughput (token/s): 153.36, #queue-req: 0
  24138. 2025-07-20 17:38:50,280 - __main__ - INFO - sglang running req: 3 queue req: 0
  24139. 2025-07-20 17:38:51,131 - sglang - INFO - [2025-07-20 17:38:51 TP0] Decode batch. #running-req: 3, #token: 7904, token usage: 0.21, gen throughput (token/s): 141.07, #queue-req: 0
  24140. 2025-07-20 17:38:51,131 - __main__ - INFO - sglang running req: 3 queue req: 0
  24141. 2025-07-20 17:38:51,977 - sglang - INFO - [2025-07-20 17:38:51 TP0] Decode batch. #running-req: 2, #token: 5697, token usage: 0.15, gen throughput (token/s): 120.60, #queue-req: 0
  24142. 2025-07-20 17:38:51,977 - __main__ - INFO - sglang running req: 2 queue req: 0
  24143. 2025-07-20 17:38:52,815 - sglang - INFO - [2025-07-20 17:38:52 TP0] Decode batch. #running-req: 2, #token: 5777, token usage: 0.15, gen throughput (token/s): 95.39, #queue-req: 0
  24144. 2025-07-20 17:38:52,815 - __main__ - INFO - sglang running req: 2 queue req: 0
  24145. 2025-07-20 17:38:53,655 - sglang - INFO - [2025-07-20 17:38:53 TP0] Decode batch. #running-req: 2, #token: 5857, token usage: 0.15, gen throughput (token/s): 95.20, #queue-req: 0
  24146. 2025-07-20 17:38:53,656 - __main__ - INFO - sglang running req: 2 queue req: 0
  24147. 2025-07-20 17:38:54,495 - sglang - INFO - [2025-07-20 17:38:54 TP0] Decode batch. #running-req: 2, #token: 5937, token usage: 0.16, gen throughput (token/s): 95.26, #queue-req: 0
  24148. 2025-07-20 17:38:54,496 - __main__ - INFO - sglang running req: 2 queue req: 0
  24149. 2025-07-20 17:38:55,334 - sglang - INFO - [2025-07-20 17:38:55 TP0] Decode batch. #running-req: 2, #token: 6017, token usage: 0.16, gen throughput (token/s): 95.37, #queue-req: 0
  24150. 2025-07-20 17:38:55,334 - __main__ - INFO - sglang running req: 2 queue req: 0
  24151. 2025-07-20 17:38:56,171 - sglang - INFO - [2025-07-20 17:38:56 TP0] Decode batch. #running-req: 1, #token: 2906, token usage: 0.08, gen throughput (token/s): 81.24, #queue-req: 0
  24152. 2025-07-20 17:38:56,171 - __main__ - INFO - sglang running req: 1 queue req: 0
  24153. 2025-07-20 17:38:56,880 - __main__ - INFO - Queue remaining: 6
  24154. 2025-07-20 17:38:56,881 - __main__ - INFO -
  24155. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24156. ----------------------------------------------------------------------------------
  24157. finished_input_tokens 427.01 368.21
  24158. finished_output_tokens 102.33 84.32
  24159. sglang_input_tokens 455.42 441.50
  24160. sglang_output_tokens 116.99 115.89
  24161. 2025-07-20 17:38:56,881 - __main__ - INFO -
  24162. Worker ID | finished | started
  24163. ----------+----------+--------
  24164. 0 | 9 | 10
  24165. 2025-07-20 17:38:57,002 - sglang - INFO - [2025-07-20 17:38:57 TP0] Decode batch. #running-req: 1, #token: 2946, token usage: 0.08, gen throughput (token/s): 48.15, #queue-req: 0
  24166. 2025-07-20 17:38:57,002 - __main__ - INFO - sglang running req: 1 queue req: 0
  24167. 2025-07-20 17:38:57,832 - sglang - INFO - [2025-07-20 17:38:57 TP0] Decode batch. #running-req: 1, #token: 2986, token usage: 0.08, gen throughput (token/s): 48.18, #queue-req: 0
  24168. 2025-07-20 17:38:57,832 - __main__ - INFO - sglang running req: 1 queue req: 0
  24169. 2025-07-20 17:38:58,662 - sglang - INFO - [2025-07-20 17:38:58 TP0] Decode batch. #running-req: 1, #token: 3026, token usage: 0.08, gen throughput (token/s): 48.22, #queue-req: 0
  24170. 2025-07-20 17:38:58,662 - __main__ - INFO - sglang running req: 1 queue req: 0
  24171. 2025-07-20 17:38:59,491 - sglang - INFO - [2025-07-20 17:38:59 TP0] Decode batch. #running-req: 1, #token: 3066, token usage: 0.08, gen throughput (token/s): 48.21, #queue-req: 0
  24172. 2025-07-20 17:38:59,492 - __main__ - INFO - sglang running req: 1 queue req: 0
  24173. 2025-07-20 17:38:59,955 - __main__ - INFO - Finished TaskGroup for worker on 662cdaa711447efb75b7c325ea177326afc2747b
  24174. 2025-07-20 17:38:59,956 - __main__ - INFO - Got 1 docs for 662cdaa711447efb75b7c325ea177326afc2747b
  24175. 2025-07-20 17:38:59,957 - __main__ - INFO - Worker 0 processing work item c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
  24176. 2025-07-20 17:38:59,957 - __main__ - INFO - Created all tasks for c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
  24177. 2025-07-20 17:38:59,966 - __main__ - INFO - Got 10 pages to do for test_pdf/1144520000702630XG344010604300102.pdf in worker 0
  24178. 2025-07-20 17:39:00,096 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-10
  24179. 2025-07-20 17:39:00,115 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-3
  24180. 2025-07-20 17:39:00,134 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-6
  24181. 2025-07-20 17:39:00,140 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-4
  24182. 2025-07-20 17:39:00,149 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-7
  24183. 2025-07-20 17:39:00,154 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-5
  24184. 2025-07-20 17:39:00,156 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-2
  24185. 2025-07-20 17:39:00,173 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-1
  24186. 2025-07-20 17:39:00,217 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-8
  24187. 2025-07-20 17:39:00,221 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604300102.pdf-9
  24188. 2025-07-20 17:39:00,282 - sglang - INFO - [2025-07-20 17:39:00 TP0] Prefill batch. #new-seq: 1, #new-token: 1732, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24189. 2025-07-20 17:39:00,283 - __main__ - INFO - sglang running req: 0 queue req: 0
  24190. 2025-07-20 17:39:00,918 - sglang - INFO - [2025-07-20 17:39:00 TP0] Prefill batch. #new-seq: 6, #new-token: 14500, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 3
  24191. 2025-07-20 17:39:00,918 - __main__ - INFO - sglang running req: 1 queue req: 3
  24192. 2025-07-20 17:39:05,575 - sglang - INFO - [2025-07-20 17:39:05 TP0] Decode batch. #running-req: 7, #token: 16358, token usage: 0.43, gen throughput (token/s): 24.33, #queue-req: 3
  24193. 2025-07-20 17:39:05,575 - __main__ - INFO - sglang running req: 7 queue req: 3
  24194. 2025-07-20 17:39:06,458 - sglang - INFO - [2025-07-20 17:39:06 TP0] Decode batch. #running-req: 7, #token: 16638, token usage: 0.44, gen throughput (token/s): 316.94, #queue-req: 3
  24195. 2025-07-20 17:39:06,458 - __main__ - INFO - sglang running req: 7 queue req: 3
  24196. 2025-07-20 17:39:06,882 - __main__ - INFO - Queue remaining: 5
  24197. 2025-07-20 17:39:06,882 - __main__ - INFO -
  24198. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24199. ----------------------------------------------------------------------------------
  24200. finished_input_tokens 445.35 435.06
  24201. finished_output_tokens 106.13 98.65
  24202. sglang_input_tokens 452.75 449.04
  24203. sglang_output_tokens 116.59 118.65
  24204. 2025-07-20 17:39:06,883 - __main__ - INFO -
  24205. Worker ID | started
  24206. ----------+--------
  24207. 0 | 10
  24208. 2025-07-20 17:39:07,342 - sglang - INFO - [2025-07-20 17:39:07 TP0] Decode batch. #running-req: 7, #token: 16918, token usage: 0.45, gen throughput (token/s): 316.93, #queue-req: 3
  24209. 2025-07-20 17:39:07,342 - __main__ - INFO - sglang running req: 7 queue req: 3
  24210. 2025-07-20 17:39:07,674 - sglang - INFO - [2025-07-20 17:39:07 TP0] Prefill batch. #new-seq: 2, #new-token: 4506, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.38, #running-req: 6, #queue-req: 1
  24211. 2025-07-20 17:39:07,675 - __main__ - INFO - sglang running req: 6 queue req: 1
  24212. 2025-07-20 17:39:09,632 - sglang - INFO - [2025-07-20 17:39:09 TP0] Decode batch. #running-req: 8, #token: 19254, token usage: 0.51, gen throughput (token/s): 132.75, #queue-req: 1
  24213. 2025-07-20 17:39:09,632 - __main__ - INFO - sglang running req: 8 queue req: 1
  24214. 2025-07-20 17:39:10,526 - sglang - INFO - [2025-07-20 17:39:10 TP0] Decode batch. #running-req: 8, #token: 19574, token usage: 0.52, gen throughput (token/s): 357.91, #queue-req: 1
  24215. 2025-07-20 17:39:10,526 - __main__ - INFO - sglang running req: 8 queue req: 1
  24216. 2025-07-20 17:39:11,418 - sglang - INFO - [2025-07-20 17:39:11 TP0] Decode batch. #running-req: 8, #token: 19894, token usage: 0.52, gen throughput (token/s): 358.68, #queue-req: 1
  24217. 2025-07-20 17:39:11,418 - __main__ - INFO - sglang running req: 8 queue req: 1
  24218. 2025-07-20 17:39:12,310 - sglang - INFO - [2025-07-20 17:39:12 TP0] Decode batch. #running-req: 8, #token: 20214, token usage: 0.53, gen throughput (token/s): 358.86, #queue-req: 1
  24219. 2025-07-20 17:39:12,310 - __main__ - INFO - sglang running req: 8 queue req: 1
  24220. 2025-07-20 17:39:12,467 - sglang - INFO - [2025-07-20 17:39:12 TP0] Prefill batch. #new-seq: 1, #new-token: 2315, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.48, #running-req: 7, #queue-req: 0
  24221. 2025-07-20 17:39:12,467 - __main__ - INFO - sglang running req: 7 queue req: 0
  24222. 2025-07-20 17:39:13,948 - sglang - INFO - [2025-07-20 17:39:13 TP0] Decode batch. #running-req: 8, #token: 20852, token usage: 0.55, gen throughput (token/s): 194.71, #queue-req: 0
  24223. 2025-07-20 17:39:13,948 - __main__ - INFO - sglang running req: 8 queue req: 0
  24224. 2025-07-20 17:39:14,844 - sglang - INFO - [2025-07-20 17:39:14 TP0] Decode batch. #running-req: 8, #token: 21172, token usage: 0.56, gen throughput (token/s): 357.41, #queue-req: 0
  24225. 2025-07-20 17:39:14,844 - __main__ - INFO - sglang running req: 8 queue req: 0
  24226. 2025-07-20 17:39:15,739 - sglang - INFO - [2025-07-20 17:39:15 TP0] Decode batch. #running-req: 7, #token: 18815, token usage: 0.50, gen throughput (token/s): 350.61, #queue-req: 0
  24227. 2025-07-20 17:39:15,739 - __main__ - INFO - sglang running req: 7 queue req: 0
  24228. 2025-07-20 17:39:16,628 - sglang - INFO - [2025-07-20 17:39:16 TP0] Decode batch. #running-req: 7, #token: 19095, token usage: 0.50, gen throughput (token/s): 314.88, #queue-req: 0
  24229. 2025-07-20 17:39:16,629 - __main__ - INFO - sglang running req: 7 queue req: 0
  24230. 2025-07-20 17:39:16,884 - __main__ - INFO - Queue remaining: 5
  24231. 2025-07-20 17:39:16,885 - __main__ - INFO -
  24232. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24233. ----------------------------------------------------------------------------------
  24234. finished_input_tokens 440.24 435.06
  24235. finished_output_tokens 104.92 98.65
  24236. sglang_input_tokens 454.89 420.30
  24237. sglang_output_tokens 116.12 110.53
  24238. 2025-07-20 17:39:16,885 - __main__ - INFO -
  24239. Worker ID | finished | started
  24240. ----------+----------+--------
  24241. 0 | 3 | 10
  24242. 2025-07-20 17:39:17,516 - sglang - INFO - [2025-07-20 17:39:17 TP0] Decode batch. #running-req: 6, #token: 16938, token usage: 0.45, gen throughput (token/s): 305.12, #queue-req: 0
  24243. 2025-07-20 17:39:17,517 - __main__ - INFO - sglang running req: 6 queue req: 0
  24244. 2025-07-20 17:39:18,409 - sglang - INFO - [2025-07-20 17:39:18 TP0] Decode batch. #running-req: 6, #token: 17178, token usage: 0.45, gen throughput (token/s): 268.88, #queue-req: 0
  24245. 2025-07-20 17:39:18,409 - __main__ - INFO - sglang running req: 6 queue req: 0
  24246. 2025-07-20 17:39:19,307 - sglang - INFO - [2025-07-20 17:39:19 TP0] Decode batch. #running-req: 6, #token: 17418, token usage: 0.46, gen throughput (token/s): 267.34, #queue-req: 0
  24247. 2025-07-20 17:39:19,307 - __main__ - INFO - sglang running req: 6 queue req: 0
  24248. 2025-07-20 17:39:20,206 - sglang - INFO - [2025-07-20 17:39:20 TP0] Decode batch. #running-req: 6, #token: 17658, token usage: 0.46, gen throughput (token/s): 266.96, #queue-req: 0
  24249. 2025-07-20 17:39:20,206 - __main__ - INFO - sglang running req: 6 queue req: 0
  24250. 2025-07-20 17:39:21,101 - sglang - INFO - [2025-07-20 17:39:21 TP0] Decode batch. #running-req: 5, #token: 11556, token usage: 0.30, gen throughput (token/s): 244.71, #queue-req: 0
  24251. 2025-07-20 17:39:21,101 - __main__ - INFO - sglang running req: 5 queue req: 0
  24252. 2025-07-20 17:39:21,970 - sglang - INFO - [2025-07-20 17:39:21 TP0] Decode batch. #running-req: 3, #token: 9008, token usage: 0.24, gen throughput (token/s): 155.35, #queue-req: 0
  24253. 2025-07-20 17:39:21,970 - __main__ - INFO - sglang running req: 3 queue req: 0
  24254. 2025-07-20 17:39:22,836 - sglang - INFO - [2025-07-20 17:39:22 TP0] Decode batch. #running-req: 2, #token: 6354, token usage: 0.17, gen throughput (token/s): 137.39, #queue-req: 0
  24255. 2025-07-20 17:39:22,836 - __main__ - INFO - sglang running req: 2 queue req: 0
  24256. 2025-07-20 17:39:23,688 - sglang - INFO - [2025-07-20 17:39:23 TP0] Decode batch. #running-req: 2, #token: 6434, token usage: 0.17, gen throughput (token/s): 93.90, #queue-req: 0
  24257. 2025-07-20 17:39:23,688 - __main__ - INFO - sglang running req: 2 queue req: 0
  24258. 2025-07-20 17:39:24,539 - sglang - INFO - [2025-07-20 17:39:24 TP0] Decode batch. #running-req: 2, #token: 6514, token usage: 0.17, gen throughput (token/s): 93.97, #queue-req: 0
  24259. 2025-07-20 17:39:24,540 - __main__ - INFO - sglang running req: 2 queue req: 0
  24260. 2025-07-20 17:39:25,384 - sglang - INFO - [2025-07-20 17:39:25 TP0] Decode batch. #running-req: 1, #token: 3022, token usage: 0.08, gen throughput (token/s): 62.72, #queue-req: 0
  24261. 2025-07-20 17:39:25,384 - __main__ - INFO - sglang running req: 1 queue req: 0
  24262. 2025-07-20 17:39:26,226 - sglang - INFO - [2025-07-20 17:39:26 TP0] Decode batch. #running-req: 1, #token: 3062, token usage: 0.08, gen throughput (token/s): 47.55, #queue-req: 0
  24263. 2025-07-20 17:39:26,226 - __main__ - INFO - sglang running req: 1 queue req: 0
  24264. 2025-07-20 17:39:26,887 - __main__ - INFO - Queue remaining: 5
  24265. 2025-07-20 17:39:26,887 - __main__ - INFO -
  24266. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24267. ----------------------------------------------------------------------------------
  24268. finished_input_tokens 435.24 370.79
  24269. finished_output_tokens 103.72 84.06
  24270. sglang_input_tokens 466.01 453.89
  24271. sglang_output_tokens 118.67 117.94
  24272. 2025-07-20 17:39:26,887 - __main__ - INFO -
  24273. Worker ID | finished | started
  24274. ----------+----------+--------
  24275. 0 | 9 | 10
  24276. 2025-07-20 17:39:27,067 - sglang - INFO - [2025-07-20 17:39:27 TP0] Decode batch. #running-req: 1, #token: 3102, token usage: 0.08, gen throughput (token/s): 47.54, #queue-req: 0
  24277. 2025-07-20 17:39:27,067 - __main__ - INFO - sglang running req: 1 queue req: 0
  24278. 2025-07-20 17:39:27,910 - sglang - INFO - [2025-07-20 17:39:27 TP0] Decode batch. #running-req: 1, #token: 3142, token usage: 0.08, gen throughput (token/s): 47.46, #queue-req: 0
  24279. 2025-07-20 17:39:27,910 - __main__ - INFO - sglang running req: 1 queue req: 0
  24280. 2025-07-20 17:39:28,753 - sglang - INFO - [2025-07-20 17:39:28 TP0] Decode batch. #running-req: 1, #token: 3182, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
  24281. 2025-07-20 17:39:28,753 - __main__ - INFO - sglang running req: 1 queue req: 0
  24282. 2025-07-20 17:39:29,307 - __main__ - INFO - Finished TaskGroup for worker on c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
  24283. 2025-07-20 17:39:29,307 - __main__ - INFO - Got 1 docs for c1e2b4f5c6c4bb6407c21dcae6a8dccdc2ad0e74
  24284. 2025-07-20 17:39:29,309 - __main__ - INFO - Worker 0 processing work item 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
  24285. 2025-07-20 17:39:29,309 - __main__ - INFO - Created all tasks for 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
  24286. 2025-07-20 17:39:29,315 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900902.pdf in worker 0
  24287. 2025-07-20 17:39:29,436 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-6
  24288. 2025-07-20 17:39:29,454 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-2
  24289. 2025-07-20 17:39:29,471 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-3
  24290. 2025-07-20 17:39:29,500 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-1
  24291. 2025-07-20 17:39:29,558 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-4
  24292. 2025-07-20 17:39:29,575 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900902.pdf-5
  24293. 2025-07-20 17:39:29,736 - sglang - INFO - [2025-07-20 17:39:29 TP0] Prefill batch. #new-seq: 1, #new-token: 1592, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24294. 2025-07-20 17:39:29,737 - __main__ - INFO - sglang running req: 0 queue req: 0
  24295. 2025-07-20 17:39:30,331 - sglang - INFO - [2025-07-20 17:39:30 TP0] Prefill batch. #new-seq: 5, #new-token: 11246, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  24296. 2025-07-20 17:39:30,331 - __main__ - INFO - sglang running req: 1 queue req: 0
  24297. 2025-07-20 17:39:34,039 - sglang - INFO - [2025-07-20 17:39:34 TP0] Decode batch. #running-req: 6, #token: 12922, token usage: 0.34, gen throughput (token/s): 20.81, #queue-req: 0
  24298. 2025-07-20 17:39:34,039 - __main__ - INFO - sglang running req: 6 queue req: 0
  24299. 2025-07-20 17:39:34,926 - sglang - INFO - [2025-07-20 17:39:34 TP0] Decode batch. #running-req: 6, #token: 13162, token usage: 0.35, gen throughput (token/s): 270.61, #queue-req: 0
  24300. 2025-07-20 17:39:34,926 - __main__ - INFO - sglang running req: 6 queue req: 0
  24301. 2025-07-20 17:39:35,814 - sglang - INFO - [2025-07-20 17:39:35 TP0] Decode batch. #running-req: 6, #token: 13402, token usage: 0.35, gen throughput (token/s): 270.25, #queue-req: 0
  24302. 2025-07-20 17:39:35,814 - __main__ - INFO - sglang running req: 6 queue req: 0
  24303. 2025-07-20 17:39:36,702 - sglang - INFO - [2025-07-20 17:39:36 TP0] Decode batch. #running-req: 6, #token: 13642, token usage: 0.36, gen throughput (token/s): 270.18, #queue-req: 0
  24304. 2025-07-20 17:39:36,702 - __main__ - INFO - sglang running req: 6 queue req: 0
  24305. 2025-07-20 17:39:36,888 - __main__ - INFO - Queue remaining: 4
  24306. 2025-07-20 17:39:36,888 - __main__ - INFO -
  24307. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24308. ----------------------------------------------------------------------------------
  24309. finished_input_tokens 456.23 447.63
  24310. finished_output_tokens 108.24 100.91
  24311. sglang_input_tokens 463.38 450.89
  24312. sglang_output_tokens 118.34 119.31
  24313. 2025-07-20 17:39:36,889 - __main__ - INFO -
  24314. Worker ID | started
  24315. ----------+--------
  24316. 0 | 6
  24317. 2025-07-20 17:39:37,590 - sglang - INFO - [2025-07-20 17:39:37 TP0] Decode batch. #running-req: 6, #token: 13882, token usage: 0.37, gen throughput (token/s): 270.39, #queue-req: 0
  24318. 2025-07-20 17:39:37,590 - __main__ - INFO - sglang running req: 6 queue req: 0
  24319. 2025-07-20 17:39:38,477 - sglang - INFO - [2025-07-20 17:39:38 TP0] Decode batch. #running-req: 6, #token: 14122, token usage: 0.37, gen throughput (token/s): 270.52, #queue-req: 0
  24320. 2025-07-20 17:39:38,477 - __main__ - INFO - sglang running req: 6 queue req: 0
  24321. 2025-07-20 17:39:39,364 - sglang - INFO - [2025-07-20 17:39:39 TP0] Decode batch. #running-req: 6, #token: 14362, token usage: 0.38, gen throughput (token/s): 270.54, #queue-req: 0
  24322. 2025-07-20 17:39:39,364 - __main__ - INFO - sglang running req: 6 queue req: 0
  24323. 2025-07-20 17:39:40,252 - sglang - INFO - [2025-07-20 17:39:40 TP0] Decode batch. #running-req: 5, #token: 12716, token usage: 0.33, gen throughput (token/s): 244.43, #queue-req: 0
  24324. 2025-07-20 17:39:40,252 - __main__ - INFO - sglang running req: 5 queue req: 0
  24325. 2025-07-20 17:39:41,137 - sglang - INFO - [2025-07-20 17:39:41 TP0] Decode batch. #running-req: 5, #token: 12916, token usage: 0.34, gen throughput (token/s): 225.95, #queue-req: 0
  24326. 2025-07-20 17:39:41,137 - __main__ - INFO - sglang running req: 5 queue req: 0
  24327. 2025-07-20 17:39:42,023 - sglang - INFO - [2025-07-20 17:39:42 TP0] Decode batch. #running-req: 5, #token: 13116, token usage: 0.35, gen throughput (token/s): 225.82, #queue-req: 0
  24328. 2025-07-20 17:39:42,023 - __main__ - INFO - sglang running req: 5 queue req: 0
  24329. 2025-07-20 17:39:42,907 - sglang - INFO - [2025-07-20 17:39:42 TP0] Decode batch. #running-req: 5, #token: 13316, token usage: 0.35, gen throughput (token/s): 226.20, #queue-req: 0
  24330. 2025-07-20 17:39:42,907 - __main__ - INFO - sglang running req: 5 queue req: 0
  24331. 2025-07-20 17:39:43,785 - sglang - INFO - [2025-07-20 17:39:43 TP0] Decode batch. #running-req: 5, #token: 13516, token usage: 0.36, gen throughput (token/s): 227.87, #queue-req: 0
  24332. 2025-07-20 17:39:43,785 - __main__ - INFO - sglang running req: 5 queue req: 0
  24333. 2025-07-20 17:39:44,659 - sglang - INFO - [2025-07-20 17:39:44 TP0] Decode batch. #running-req: 3, #token: 8858, token usage: 0.23, gen throughput (token/s): 166.94, #queue-req: 0
  24334. 2025-07-20 17:39:44,660 - __main__ - INFO - sglang running req: 3 queue req: 0
  24335. 2025-07-20 17:39:45,527 - sglang - INFO - [2025-07-20 17:39:45 TP0] Decode batch. #running-req: 3, #token: 8978, token usage: 0.24, gen throughput (token/s): 138.33, #queue-req: 0
  24336. 2025-07-20 17:39:45,527 - __main__ - INFO - sglang running req: 3 queue req: 0
  24337. 2025-07-20 17:39:46,376 - sglang - INFO - [2025-07-20 17:39:46 TP0] Decode batch. #running-req: 2, #token: 6167, token usage: 0.16, gen throughput (token/s): 103.56, #queue-req: 0
  24338. 2025-07-20 17:39:46,377 - __main__ - INFO - sglang running req: 2 queue req: 0
  24339. 2025-07-20 17:39:46,890 - __main__ - INFO - Queue remaining: 4
  24340. 2025-07-20 17:39:46,890 - __main__ - INFO -
  24341. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24342. ----------------------------------------------------------------------------------
  24343. finished_input_tokens 451.17 408.60
  24344. finished_output_tokens 107.03 92.44
  24345. sglang_input_tokens 466.92 448.64
  24346. sglang_output_tokens 118.97 118.28
  24347. 2025-07-20 17:39:46,890 - __main__ - INFO -
  24348. Worker ID | finished | started
  24349. ----------+----------+--------
  24350. 0 | 4 | 6
  24351. 2025-07-20 17:39:47,220 - sglang - INFO - [2025-07-20 17:39:47 TP0] Decode batch. #running-req: 2, #token: 6247, token usage: 0.16, gen throughput (token/s): 94.78, #queue-req: 0
  24352. 2025-07-20 17:39:47,221 - __main__ - INFO - sglang running req: 2 queue req: 0
  24353. 2025-07-20 17:39:48,066 - sglang - INFO - [2025-07-20 17:39:48 TP0] Decode batch. #running-req: 2, #token: 6327, token usage: 0.17, gen throughput (token/s): 94.64, #queue-req: 0
  24354. 2025-07-20 17:39:48,066 - __main__ - INFO - sglang running req: 2 queue req: 0
  24355. 2025-07-20 17:39:48,912 - sglang - INFO - [2025-07-20 17:39:48 TP0] Decode batch. #running-req: 1, #token: 3118, token usage: 0.08, gen throughput (token/s): 55.54, #queue-req: 0
  24356. 2025-07-20 17:39:48,912 - __main__ - INFO - sglang running req: 1 queue req: 0
  24357. 2025-07-20 17:39:49,761 - sglang - INFO - [2025-07-20 17:39:49 TP0] Decode batch. #running-req: 1, #token: 3158, token usage: 0.08, gen throughput (token/s): 47.12, #queue-req: 0
  24358. 2025-07-20 17:39:49,761 - __main__ - INFO - sglang running req: 1 queue req: 0
  24359. 2025-07-20 17:39:50,605 - sglang - INFO - [2025-07-20 17:39:50 TP0] Decode batch. #running-req: 1, #token: 3198, token usage: 0.08, gen throughput (token/s): 47.38, #queue-req: 0
  24360. 2025-07-20 17:39:50,605 - __main__ - INFO - sglang running req: 1 queue req: 0
  24361. 2025-07-20 17:39:51,463 - sglang - INFO - [2025-07-20 17:39:51 TP0] Decode batch. #running-req: 1, #token: 3238, token usage: 0.09, gen throughput (token/s): 46.61, #queue-req: 0
  24362. 2025-07-20 17:39:51,464 - __main__ - INFO - sglang running req: 1 queue req: 0
  24363. 2025-07-20 17:39:52,314 - sglang - INFO - [2025-07-20 17:39:52 TP0] Decode batch. #running-req: 1, #token: 3278, token usage: 0.09, gen throughput (token/s): 47.04, #queue-req: 0
  24364. 2025-07-20 17:39:52,314 - __main__ - INFO - sglang running req: 1 queue req: 0
  24365. 2025-07-20 17:39:53,166 - sglang - INFO - [2025-07-20 17:39:53 TP0] Decode batch. #running-req: 1, #token: 3318, token usage: 0.09, gen throughput (token/s): 46.96, #queue-req: 0
  24366. 2025-07-20 17:39:53,166 - __main__ - INFO - sglang running req: 1 queue req: 0
  24367. 2025-07-20 17:39:53,405 - __main__ - INFO - Finished TaskGroup for worker on 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
  24368. 2025-07-20 17:39:53,406 - __main__ - INFO - Got 1 docs for 5da3510f60e4d62bb38dbf36fb90d4a0034727fa
  24369. 2025-07-20 17:39:53,407 - __main__ - INFO - Worker 0 processing work item 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
  24370. 2025-07-20 17:39:53,408 - __main__ - INFO - Created all tasks for 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
  24371. 2025-07-20 17:39:53,413 - __main__ - INFO - Got 7 pages to do for test_pdf/1144520000702630XG3440106029008.pdf in worker 0
  24372. 2025-07-20 17:39:53,528 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-3
  24373. 2025-07-20 17:39:53,559 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-4
  24374. 2025-07-20 17:39:53,573 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-7
  24375. 2025-07-20 17:39:53,580 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-2
  24376. 2025-07-20 17:39:53,608 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-1
  24377. 2025-07-20 17:39:53,658 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-5
  24378. 2025-07-20 17:39:53,676 - sglang - INFO - [2025-07-20 17:39:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1499, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24379. 2025-07-20 17:39:53,677 - __main__ - INFO - sglang running req: 0 queue req: 0
  24380. 2025-07-20 17:39:53,677 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106029008.pdf-6
  24381. 2025-07-20 17:39:54,302 - sglang - INFO - [2025-07-20 17:39:54 TP0] Prefill batch. #new-seq: 6, #new-token: 13502, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  24382. 2025-07-20 17:39:54,303 - __main__ - INFO - sglang running req: 1 queue req: 0
  24383. 2025-07-20 17:39:56,891 - __main__ - INFO - Queue remaining: 3
  24384. 2025-07-20 17:39:56,892 - __main__ - INFO -
  24385. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24386. ----------------------------------------------------------------------------------
  24387. finished_input_tokens 460.31 451.39
  24388. finished_output_tokens 109.50 103.51
  24389. sglang_input_tokens 467.30 446.84
  24390. sglang_output_tokens 119.39 120.59
  24391. 2025-07-20 17:39:56,892 - __main__ - INFO -
  24392. Worker ID | started
  24393. ----------+--------
  24394. 0 | 7
  24395. 2025-07-20 17:39:59,029 - sglang - INFO - [2025-07-20 17:39:59 TP0] Decode batch. #running-req: 7, #token: 15204, token usage: 0.40, gen throughput (token/s): 36.50, #queue-req: 0
  24396. 2025-07-20 17:39:59,029 - __main__ - INFO - sglang running req: 7 queue req: 0
  24397. 2025-07-20 17:39:59,922 - sglang - INFO - [2025-07-20 17:39:59 TP0] Decode batch. #running-req: 7, #token: 15484, token usage: 0.41, gen throughput (token/s): 313.39, #queue-req: 0
  24398. 2025-07-20 17:39:59,922 - __main__ - INFO - sglang running req: 7 queue req: 0
  24399. 2025-07-20 17:40:00,816 - sglang - INFO - [2025-07-20 17:40:00 TP0] Decode batch. #running-req: 7, #token: 15764, token usage: 0.41, gen throughput (token/s): 313.16, #queue-req: 0
  24400. 2025-07-20 17:40:00,816 - __main__ - INFO - sglang running req: 7 queue req: 0
  24401. 2025-07-20 17:40:01,714 - sglang - INFO - [2025-07-20 17:40:01 TP0] Decode batch. #running-req: 7, #token: 16044, token usage: 0.42, gen throughput (token/s): 312.05, #queue-req: 0
  24402. 2025-07-20 17:40:01,714 - __main__ - INFO - sglang running req: 7 queue req: 0
  24403. 2025-07-20 17:40:02,609 - sglang - INFO - [2025-07-20 17:40:02 TP0] Decode batch. #running-req: 6, #token: 14636, token usage: 0.39, gen throughput (token/s): 275.85, #queue-req: 0
  24404. 2025-07-20 17:40:02,609 - __main__ - INFO - sglang running req: 6 queue req: 0
  24405. 2025-07-20 17:40:03,495 - sglang - INFO - [2025-07-20 17:40:03 TP0] Decode batch. #running-req: 6, #token: 14876, token usage: 0.39, gen throughput (token/s): 270.90, #queue-req: 0
  24406. 2025-07-20 17:40:03,495 - __main__ - INFO - sglang running req: 6 queue req: 0
  24407. 2025-07-20 17:40:04,377 - sglang - INFO - [2025-07-20 17:40:04 TP0] Decode batch. #running-req: 6, #token: 15116, token usage: 0.40, gen throughput (token/s): 272.09, #queue-req: 0
  24408. 2025-07-20 17:40:04,377 - __main__ - INFO - sglang running req: 6 queue req: 0
  24409. 2025-07-20 17:40:05,263 - sglang - INFO - [2025-07-20 17:40:05 TP0] Decode batch. #running-req: 6, #token: 15356, token usage: 0.40, gen throughput (token/s): 271.01, #queue-req: 0
  24410. 2025-07-20 17:40:05,263 - __main__ - INFO - sglang running req: 6 queue req: 0
  24411. 2025-07-20 17:40:06,150 - sglang - INFO - [2025-07-20 17:40:06 TP0] Decode batch. #running-req: 6, #token: 15596, token usage: 0.41, gen throughput (token/s): 270.51, #queue-req: 0
  24412. 2025-07-20 17:40:06,150 - __main__ - INFO - sglang running req: 6 queue req: 0
  24413. 2025-07-20 17:40:06,893 - __main__ - INFO - Queue remaining: 3
  24414. 2025-07-20 17:40:06,893 - __main__ - INFO -
  24415. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24416. ----------------------------------------------------------------------------------
  24417. finished_input_tokens 455.31 406.89
  24418. finished_output_tokens 108.31 93.30
  24419. sglang_input_tokens 463.85 425.86
  24420. sglang_output_tokens 118.26 113.83
  24421. 2025-07-20 17:40:06,894 - __main__ - INFO -
  24422. Worker ID | finished | started
  24423. ----------+----------+--------
  24424. 0 | 1 | 7
  24425. 2025-07-20 17:40:07,042 - sglang - INFO - [2025-07-20 17:40:07 TP0] Decode batch. #running-req: 6, #token: 15836, token usage: 0.42, gen throughput (token/s): 269.08, #queue-req: 0
  24426. 2025-07-20 17:40:07,042 - __main__ - INFO - sglang running req: 6 queue req: 0
  24427. 2025-07-20 17:40:07,934 - sglang - INFO - [2025-07-20 17:40:07 TP0] Decode batch. #running-req: 6, #token: 16076, token usage: 0.42, gen throughput (token/s): 268.95, #queue-req: 0
  24428. 2025-07-20 17:40:07,934 - __main__ - INFO - sglang running req: 6 queue req: 0
  24429. 2025-07-20 17:40:08,823 - sglang - INFO - [2025-07-20 17:40:08 TP0] Decode batch. #running-req: 5, #token: 13621, token usage: 0.36, gen throughput (token/s): 238.41, #queue-req: 0
  24430. 2025-07-20 17:40:08,824 - __main__ - INFO - sglang running req: 5 queue req: 0
  24431. 2025-07-20 17:40:09,703 - sglang - INFO - [2025-07-20 17:40:09 TP0] Decode batch. #running-req: 4, #token: 11505, token usage: 0.30, gen throughput (token/s): 182.96, #queue-req: 0
  24432. 2025-07-20 17:40:09,703 - __main__ - INFO - sglang running req: 4 queue req: 0
  24433. 2025-07-20 17:40:10,571 - sglang - INFO - [2025-07-20 17:40:10 TP0] Decode batch. #running-req: 4, #token: 11665, token usage: 0.31, gen throughput (token/s): 184.47, #queue-req: 0
  24434. 2025-07-20 17:40:10,571 - __main__ - INFO - sglang running req: 4 queue req: 0
  24435. 2025-07-20 17:40:11,435 - sglang - INFO - [2025-07-20 17:40:11 TP0] Decode batch. #running-req: 4, #token: 11825, token usage: 0.31, gen throughput (token/s): 185.05, #queue-req: 0
  24436. 2025-07-20 17:40:11,435 - __main__ - INFO - sglang running req: 4 queue req: 0
  24437. 2025-07-20 17:40:12,288 - sglang - INFO - [2025-07-20 17:40:12 TP0] Decode batch. #running-req: 2, #token: 6278, token usage: 0.17, gen throughput (token/s): 117.26, #queue-req: 0
  24438. 2025-07-20 17:40:12,288 - __main__ - INFO - sglang running req: 2 queue req: 0
  24439. 2025-07-20 17:40:13,137 - sglang - INFO - [2025-07-20 17:40:13 TP0] Decode batch. #running-req: 2, #token: 6358, token usage: 0.17, gen throughput (token/s): 94.18, #queue-req: 0
  24440. 2025-07-20 17:40:13,138 - __main__ - INFO - sglang running req: 2 queue req: 0
  24441. 2025-07-20 17:40:13,986 - sglang - INFO - [2025-07-20 17:40:13 TP0] Decode batch. #running-req: 1, #token: 3157, token usage: 0.08, gen throughput (token/s): 76.63, #queue-req: 0
  24442. 2025-07-20 17:40:13,986 - __main__ - INFO - sglang running req: 1 queue req: 0
  24443. 2025-07-20 17:40:14,828 - sglang - INFO - [2025-07-20 17:40:14 TP0] Decode batch. #running-req: 1, #token: 3197, token usage: 0.08, gen throughput (token/s): 47.47, #queue-req: 0
  24444. 2025-07-20 17:40:14,829 - __main__ - INFO - sglang running req: 1 queue req: 0
  24445. 2025-07-20 17:40:15,670 - sglang - INFO - [2025-07-20 17:40:15 TP0] Decode batch. #running-req: 1, #token: 3237, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
  24446. 2025-07-20 17:40:15,670 - __main__ - INFO - sglang running req: 1 queue req: 0
  24447. 2025-07-20 17:40:16,514 - sglang - INFO - [2025-07-20 17:40:16 TP0] Decode batch. #running-req: 1, #token: 3277, token usage: 0.09, gen throughput (token/s): 47.37, #queue-req: 0
  24448. 2025-07-20 17:40:16,515 - __main__ - INFO - sglang running req: 1 queue req: 0
  24449. 2025-07-20 17:40:16,895 - __main__ - INFO - Queue remaining: 3
  24450. 2025-07-20 17:40:16,895 - __main__ - INFO -
  24451. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24452. ----------------------------------------------------------------------------------
  24453. finished_input_tokens 450.42 406.89
  24454. finished_output_tokens 107.15 93.30
  24455. sglang_input_tokens 470.75 454.44
  24456. sglang_output_tokens 120.01 122.52
  24457. 2025-07-20 17:40:16,895 - __main__ - INFO -
  24458. Worker ID | finished | started
  24459. ----------+----------+--------
  24460. 0 | 6 | 7
  24461. 2025-07-20 17:40:17,348 - sglang - INFO - [2025-07-20 17:40:17 TP0] Decode batch. #running-req: 1, #token: 3317, token usage: 0.09, gen throughput (token/s): 47.97, #queue-req: 0
  24462. 2025-07-20 17:40:17,349 - __main__ - INFO - sglang running req: 1 queue req: 0
  24463. 2025-07-20 17:40:18,184 - sglang - INFO - [2025-07-20 17:40:18 TP0] Decode batch. #running-req: 1, #token: 3357, token usage: 0.09, gen throughput (token/s): 47.86, #queue-req: 0
  24464. 2025-07-20 17:40:18,185 - __main__ - INFO - sglang running req: 1 queue req: 0
  24465. 2025-07-20 17:40:18,419 - __main__ - INFO - Finished TaskGroup for worker on 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
  24466. 2025-07-20 17:40:18,419 - __main__ - INFO - Got 1 docs for 7e7415b1a884dd4b422626d1f93cc9d5ff33301c
  24467. 2025-07-20 17:40:18,421 - __main__ - INFO - Worker 0 processing work item 24809642f1ed21aee754e7c58d350b261d121212
  24468. 2025-07-20 17:40:18,421 - __main__ - INFO - Created all tasks for 24809642f1ed21aee754e7c58d350b261d121212
  24469. 2025-07-20 17:40:18,427 - __main__ - INFO - Got 6 pages to do for test_pdf/1144520000702630XG344010602900602.pdf in worker 0
  24470. 2025-07-20 17:40:18,520 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-6
  24471. 2025-07-20 17:40:18,612 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-3
  24472. 2025-07-20 17:40:18,614 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-1
  24473. 2025-07-20 17:40:18,636 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-2
  24474. 2025-07-20 17:40:18,667 - sglang - INFO - [2025-07-20 17:40:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1339, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24475. 2025-07-20 17:40:18,667 - __main__ - INFO - sglang running req: 0 queue req: 0
  24476. 2025-07-20 17:40:18,670 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-4
  24477. 2025-07-20 17:40:18,678 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010602900602.pdf-5
  24478. 2025-07-20 17:40:19,234 - sglang - INFO - [2025-07-20 17:40:19 TP0] Prefill batch. #new-seq: 5, #new-token: 11852, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.04, #running-req: 1, #queue-req: 0
  24479. 2025-07-20 17:40:19,235 - __main__ - INFO - sglang running req: 1 queue req: 0
  24480. 2025-07-20 17:40:23,378 - sglang - INFO - [2025-07-20 17:40:23 TP0] Decode batch. #running-req: 6, #token: 13365, token usage: 0.35, gen throughput (token/s): 35.62, #queue-req: 0
  24481. 2025-07-20 17:40:23,379 - __main__ - INFO - sglang running req: 6 queue req: 0
  24482. 2025-07-20 17:40:24,258 - sglang - INFO - [2025-07-20 17:40:24 TP0] Decode batch. #running-req: 6, #token: 13605, token usage: 0.36, gen throughput (token/s): 272.69, #queue-req: 0
  24483. 2025-07-20 17:40:24,259 - __main__ - INFO - sglang running req: 6 queue req: 0
  24484. 2025-07-20 17:40:25,138 - sglang - INFO - [2025-07-20 17:40:25 TP0] Decode batch. #running-req: 6, #token: 13845, token usage: 0.36, gen throughput (token/s): 272.99, #queue-req: 0
  24485. 2025-07-20 17:40:25,138 - __main__ - INFO - sglang running req: 6 queue req: 0
  24486. 2025-07-20 17:40:26,018 - sglang - INFO - [2025-07-20 17:40:26 TP0] Decode batch. #running-req: 6, #token: 14085, token usage: 0.37, gen throughput (token/s): 272.53, #queue-req: 0
  24487. 2025-07-20 17:40:26,019 - __main__ - INFO - sglang running req: 6 queue req: 0
  24488. 2025-07-20 17:40:26,897 - __main__ - INFO - Queue remaining: 2
  24489. 2025-07-20 17:40:26,897 - __main__ - INFO -
  24490. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24491. ----------------------------------------------------------------------------------
  24492. finished_input_tokens 461.57 456.89
  24493. finished_output_tokens 110.14 106.26
  24494. sglang_input_tokens 468.34 447.66
  24495. sglang_output_tokens 119.71 121.47
  24496. 2025-07-20 17:40:26,898 - __main__ - INFO -
  24497. Worker ID | started
  24498. ----------+--------
  24499. 0 | 6
  24500. 2025-07-20 17:40:26,901 - sglang - INFO - [2025-07-20 17:40:26 TP0] Decode batch. #running-req: 6, #token: 14325, token usage: 0.38, gen throughput (token/s): 271.74, #queue-req: 0
  24501. 2025-07-20 17:40:26,901 - __main__ - INFO - sglang running req: 6 queue req: 0
  24502. 2025-07-20 17:40:27,789 - sglang - INFO - [2025-07-20 17:40:27 TP0] Decode batch. #running-req: 5, #token: 12997, token usage: 0.34, gen throughput (token/s): 262.47, #queue-req: 0
  24503. 2025-07-20 17:40:27,789 - __main__ - INFO - sglang running req: 5 queue req: 0
  24504. 2025-07-20 17:40:28,668 - sglang - INFO - [2025-07-20 17:40:28 TP0] Decode batch. #running-req: 5, #token: 13197, token usage: 0.35, gen throughput (token/s): 227.66, #queue-req: 0
  24505. 2025-07-20 17:40:28,668 - __main__ - INFO - sglang running req: 5 queue req: 0
  24506. 2025-07-20 17:40:29,546 - sglang - INFO - [2025-07-20 17:40:29 TP0] Decode batch. #running-req: 5, #token: 13397, token usage: 0.35, gen throughput (token/s): 227.56, #queue-req: 0
  24507. 2025-07-20 17:40:29,547 - __main__ - INFO - sglang running req: 5 queue req: 0
  24508. 2025-07-20 17:40:30,433 - sglang - INFO - [2025-07-20 17:40:30 TP0] Decode batch. #running-req: 5, #token: 13597, token usage: 0.36, gen throughput (token/s): 225.54, #queue-req: 0
  24509. 2025-07-20 17:40:30,433 - __main__ - INFO - sglang running req: 5 queue req: 0
  24510. 2025-07-20 17:40:31,312 - sglang - INFO - [2025-07-20 17:40:31 TP0] Decode batch. #running-req: 5, #token: 13797, token usage: 0.36, gen throughput (token/s): 227.52, #queue-req: 0
  24511. 2025-07-20 17:40:31,313 - __main__ - INFO - sglang running req: 5 queue req: 0
  24512. 2025-07-20 17:40:32,192 - sglang - INFO - [2025-07-20 17:40:32 TP0] Decode batch. #running-req: 5, #token: 13997, token usage: 0.37, gen throughput (token/s): 227.20, #queue-req: 0
  24513. 2025-07-20 17:40:32,193 - __main__ - INFO - sglang running req: 5 queue req: 0
  24514. 2025-07-20 17:40:33,079 - sglang - INFO - [2025-07-20 17:40:33 TP0] Decode batch. #running-req: 5, #token: 14197, token usage: 0.37, gen throughput (token/s): 225.67, #queue-req: 0
  24515. 2025-07-20 17:40:33,079 - __main__ - INFO - sglang running req: 5 queue req: 0
  24516. 2025-07-20 17:40:33,970 - sglang - INFO - [2025-07-20 17:40:33 TP0] Decode batch. #running-req: 5, #token: 14397, token usage: 0.38, gen throughput (token/s): 224.35, #queue-req: 0
  24517. 2025-07-20 17:40:33,971 - __main__ - INFO - sglang running req: 5 queue req: 0
  24518. 2025-07-20 17:40:34,855 - sglang - INFO - [2025-07-20 17:40:34 TP0] Decode batch. #running-req: 5, #token: 14597, token usage: 0.38, gen throughput (token/s): 226.02, #queue-req: 0
  24519. 2025-07-20 17:40:34,855 - __main__ - INFO - sglang running req: 5 queue req: 0
  24520. 2025-07-20 17:40:35,738 - sglang - INFO - [2025-07-20 17:40:35 TP0] Decode batch. #running-req: 5, #token: 14797, token usage: 0.39, gen throughput (token/s): 226.61, #queue-req: 0
  24521. 2025-07-20 17:40:35,738 - __main__ - INFO - sglang running req: 5 queue req: 0
  24522. 2025-07-20 17:40:36,622 - sglang - INFO - [2025-07-20 17:40:36 TP0] Decode batch. #running-req: 4, #token: 12034, token usage: 0.32, gen throughput (token/s): 206.95, #queue-req: 0
  24523. 2025-07-20 17:40:36,622 - __main__ - INFO - sglang running req: 4 queue req: 0
  24524. 2025-07-20 17:40:36,899 - __main__ - INFO - Queue remaining: 2
  24525. 2025-07-20 17:40:36,899 - __main__ - INFO -
  24526. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24527. ----------------------------------------------------------------------------------
  24528. finished_input_tokens 456.72 426.36
  24529. finished_output_tokens 108.98 98.80
  24530. sglang_input_tokens 467.28 439.72
  24531. sglang_output_tokens 119.33 119.59
  24532. 2025-07-20 17:40:36,900 - __main__ - INFO -
  24533. Worker ID | finished | started
  24534. ----------+----------+--------
  24535. 0 | 2 | 6
  24536. 2025-07-20 17:40:37,495 - sglang - INFO - [2025-07-20 17:40:37 TP0] Decode batch. #running-req: 3, #token: 8915, token usage: 0.23, gen throughput (token/s): 167.24, #queue-req: 0
  24537. 2025-07-20 17:40:37,495 - __main__ - INFO - sglang running req: 3 queue req: 0
  24538. 2025-07-20 17:40:38,369 - sglang - INFO - [2025-07-20 17:40:38 TP0] Decode batch. #running-req: 2, #token: 6214, token usage: 0.16, gen throughput (token/s): 136.17, #queue-req: 0
  24539. 2025-07-20 17:40:38,369 - __main__ - INFO - sglang running req: 2 queue req: 0
  24540. 2025-07-20 17:40:39,224 - sglang - INFO - [2025-07-20 17:40:39 TP0] Decode batch. #running-req: 2, #token: 6294, token usage: 0.17, gen throughput (token/s): 93.60, #queue-req: 0
  24541. 2025-07-20 17:40:39,224 - __main__ - INFO - sglang running req: 2 queue req: 0
  24542. 2025-07-20 17:40:40,076 - sglang - INFO - [2025-07-20 17:40:40 TP0] Decode batch. #running-req: 1, #token: 3246, token usage: 0.09, gen throughput (token/s): 59.85, #queue-req: 0
  24543. 2025-07-20 17:40:40,076 - __main__ - INFO - sglang running req: 1 queue req: 0
  24544. 2025-07-20 17:40:40,912 - sglang - INFO - [2025-07-20 17:40:40 TP0] Decode batch. #running-req: 1, #token: 3286, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
  24545. 2025-07-20 17:40:40,912 - __main__ - INFO - sglang running req: 1 queue req: 0
  24546. 2025-07-20 17:40:41,746 - sglang - INFO - [2025-07-20 17:40:41 TP0] Decode batch. #running-req: 1, #token: 3326, token usage: 0.09, gen throughput (token/s): 47.98, #queue-req: 0
  24547. 2025-07-20 17:40:41,746 - __main__ - INFO - sglang running req: 1 queue req: 0
  24548. 2025-07-20 17:40:42,578 - sglang - INFO - [2025-07-20 17:40:42 TP0] Decode batch. #running-req: 1, #token: 3366, token usage: 0.09, gen throughput (token/s): 48.03, #queue-req: 0
  24549. 2025-07-20 17:40:42,579 - __main__ - INFO - sglang running req: 1 queue req: 0
  24550. 2025-07-20 17:40:43,413 - sglang - INFO - [2025-07-20 17:40:43 TP0] Decode batch. #running-req: 1, #token: 3406, token usage: 0.09, gen throughput (token/s): 47.92, #queue-req: 0
  24551. 2025-07-20 17:40:43,414 - __main__ - INFO - sglang running req: 1 queue req: 0
  24552. 2025-07-20 17:40:43,817 - __main__ - INFO - Finished TaskGroup for worker on 24809642f1ed21aee754e7c58d350b261d121212
  24553. 2025-07-20 17:40:43,818 - __main__ - INFO - Got 1 docs for 24809642f1ed21aee754e7c58d350b261d121212
  24554. 2025-07-20 17:40:43,819 - __main__ - INFO - Worker 0 processing work item 9face5eb793573e747789b627bf1cc4b334b5b93
  24555. 2025-07-20 17:40:43,819 - __main__ - INFO - Created all tasks for 9face5eb793573e747789b627bf1cc4b334b5b93
  24556. 2025-07-20 17:40:43,827 - __main__ - INFO - Got 11 pages to do for test_pdf/1144520000702630XG3440106001004.pdf in worker 0
  24557. 2025-07-20 17:40:43,945 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-7
  24558. 2025-07-20 17:40:43,968 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-6
  24559. 2025-07-20 17:40:43,972 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-2
  24560. 2025-07-20 17:40:43,974 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-8
  24561. 2025-07-20 17:40:43,975 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-5
  24562. 2025-07-20 17:40:43,977 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-9
  24563. 2025-07-20 17:40:43,985 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-4
  24564. 2025-07-20 17:40:43,989 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-11
  24565. 2025-07-20 17:40:44,018 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-1
  24566. 2025-07-20 17:40:44,052 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-3
  24567. 2025-07-20 17:40:44,117 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG3440106001004.pdf-10
  24568. 2025-07-20 17:40:44,142 - sglang - INFO - [2025-07-20 17:40:44 TP0] Prefill batch. #new-seq: 1, #new-token: 1980, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24569. 2025-07-20 17:40:44,142 - __main__ - INFO - sglang running req: 0 queue req: 0
  24570. 2025-07-20 17:40:44,896 - sglang - INFO - [2025-07-20 17:40:44 TP0] Prefill batch. #new-seq: 6, #new-token: 12273, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 4
  24571. 2025-07-20 17:40:44,896 - __main__ - INFO - sglang running req: 1 queue req: 4
  24572. 2025-07-20 17:40:46,901 - __main__ - INFO - Queue remaining: 1
  24573. 2025-07-20 17:40:46,901 - __main__ - INFO -
  24574. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24575. ----------------------------------------------------------------------------------
  24576. finished_input_tokens 465.69 470.33
  24577. finished_output_tokens 111.94 111.90
  24578. sglang_input_tokens 472.32 456.05
  24579. sglang_output_tokens 121.31 127.10
  24580. 2025-07-20 17:40:46,901 - __main__ - INFO -
  24581. Worker ID | started
  24582. ----------+--------
  24583. 0 | 11
  24584. 2025-07-20 17:40:49,174 - sglang - INFO - [2025-07-20 17:40:49 TP0] Decode batch. #running-req: 7, #token: 14400, token usage: 0.38, gen throughput (token/s): 28.81, #queue-req: 4
  24585. 2025-07-20 17:40:49,175 - __main__ - INFO - sglang running req: 7 queue req: 4
  24586. 2025-07-20 17:40:50,062 - sglang - INFO - [2025-07-20 17:40:50 TP0] Decode batch. #running-req: 7, #token: 14680, token usage: 0.39, gen throughput (token/s): 315.36, #queue-req: 4
  24587. 2025-07-20 17:40:50,063 - __main__ - INFO - sglang running req: 7 queue req: 4
  24588. 2025-07-20 17:40:50,956 - sglang - INFO - [2025-07-20 17:40:50 TP0] Decode batch. #running-req: 7, #token: 14960, token usage: 0.39, gen throughput (token/s): 313.27, #queue-req: 4
  24589. 2025-07-20 17:40:50,957 - __main__ - INFO - sglang running req: 7 queue req: 4
  24590. 2025-07-20 17:40:51,840 - sglang - INFO - [2025-07-20 17:40:51 TP0] Decode batch. #running-req: 7, #token: 15240, token usage: 0.40, gen throughput (token/s): 316.72, #queue-req: 4
  24591. 2025-07-20 17:40:51,841 - __main__ - INFO - sglang running req: 7 queue req: 4
  24592. 2025-07-20 17:40:52,722 - sglang - INFO - [2025-07-20 17:40:52 TP0] Decode batch. #running-req: 7, #token: 15520, token usage: 0.41, gen throughput (token/s): 317.53, #queue-req: 4
  24593. 2025-07-20 17:40:52,722 - __main__ - INFO - sglang running req: 7 queue req: 4
  24594. 2025-07-20 17:40:53,614 - sglang - INFO - [2025-07-20 17:40:53 TP0] Decode batch. #running-req: 7, #token: 15800, token usage: 0.42, gen throughput (token/s): 313.91, #queue-req: 4
  24595. 2025-07-20 17:40:53,614 - __main__ - INFO - sglang running req: 7 queue req: 4
  24596. 2025-07-20 17:40:54,507 - sglang - INFO - [2025-07-20 17:40:54 TP0] Decode batch. #running-req: 7, #token: 16080, token usage: 0.42, gen throughput (token/s): 313.44, #queue-req: 4
  24597. 2025-07-20 17:40:54,508 - __main__ - INFO - sglang running req: 7 queue req: 4
  24598. 2025-07-20 17:40:55,332 - sglang - INFO - [2025-07-20 17:40:55 TP0] Prefill batch. #new-seq: 3, #new-token: 6536, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.37, #running-req: 6, #queue-req: 1
  24599. 2025-07-20 17:40:55,333 - __main__ - INFO - sglang running req: 6 queue req: 1
  24600. 2025-07-20 17:40:56,903 - __main__ - INFO - Queue remaining: 1
  24601. 2025-07-20 17:40:56,903 - __main__ - INFO -
  24602. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24603. ----------------------------------------------------------------------------------
  24604. finished_input_tokens 460.90 470.33
  24605. finished_output_tokens 110.79 111.90
  24606. sglang_input_tokens 469.50 407.44
  24607. sglang_output_tokens 120.37 115.07
  24608. 2025-07-20 17:40:56,903 - __main__ - INFO -
  24609. Worker ID | finished | started
  24610. ----------+----------+--------
  24611. 0 | 1 | 11
  24612. 2025-07-20 17:40:57,416 - sglang - INFO - [2025-07-20 17:40:57 TP0] Decode batch. #running-req: 9, #token: 20624, token usage: 0.54, gen throughput (token/s): 97.98, #queue-req: 1
  24613. 2025-07-20 17:40:57,416 - __main__ - INFO - sglang running req: 9 queue req: 1
  24614. 2025-07-20 17:40:58,377 - sglang - INFO - [2025-07-20 17:40:58 TP0] Decode batch. #running-req: 9, #token: 20984, token usage: 0.55, gen throughput (token/s): 374.65, #queue-req: 1
  24615. 2025-07-20 17:40:58,377 - __main__ - INFO - sglang running req: 9 queue req: 1
  24616. 2025-07-20 17:40:58,520 - sglang - INFO - [2025-07-20 17:40:58 TP0] Prefill batch. #new-seq: 1, #new-token: 2579, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.49, #running-req: 8, #queue-req: 0
  24617. 2025-07-20 17:40:58,520 - __main__ - INFO - sglang running req: 8 queue req: 0
  24618. 2025-07-20 17:41:00,068 - sglang - INFO - [2025-07-20 17:41:00 TP0] Decode batch. #running-req: 6, #token: 14139, token usage: 0.37, gen throughput (token/s): 175.60, #queue-req: 0
  24619. 2025-07-20 17:41:00,069 - __main__ - INFO - sglang running req: 6 queue req: 0
  24620. 2025-07-20 17:41:00,953 - sglang - INFO - [2025-07-20 17:41:00 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 241.78, #queue-req: 0
  24621. 2025-07-20 17:41:00,954 - __main__ - INFO - sglang running req: 5 queue req: 0
  24622. 2025-07-20 17:41:01,835 - sglang - INFO - [2025-07-20 17:41:01 TP0] Decode batch. #running-req: 5, #token: 12090, token usage: 0.32, gen throughput (token/s): 226.94, #queue-req: 0
  24623. 2025-07-20 17:41:01,835 - __main__ - INFO - sglang running req: 5 queue req: 0
  24624. 2025-07-20 17:41:02,704 - sglang - INFO - [2025-07-20 17:41:02 TP0] Decode batch. #running-req: 4, #token: 9878, token usage: 0.26, gen throughput (token/s): 196.64, #queue-req: 0
  24625. 2025-07-20 17:41:02,705 - __main__ - INFO - sglang running req: 4 queue req: 0
  24626. 2025-07-20 17:41:03,570 - sglang - INFO - [2025-07-20 17:41:03 TP0] Decode batch. #running-req: 4, #token: 10038, token usage: 0.26, gen throughput (token/s): 184.76, #queue-req: 0
  24627. 2025-07-20 17:41:03,570 - __main__ - INFO - sglang running req: 4 queue req: 0
  24628. 2025-07-20 17:41:04,442 - sglang - INFO - [2025-07-20 17:41:04 TP0] Decode batch. #running-req: 4, #token: 10198, token usage: 0.27, gen throughput (token/s): 183.60, #queue-req: 0
  24629. 2025-07-20 17:41:04,442 - __main__ - INFO - sglang running req: 4 queue req: 0
  24630. 2025-07-20 17:41:05,319 - sglang - INFO - [2025-07-20 17:41:05 TP0] Decode batch. #running-req: 4, #token: 10358, token usage: 0.27, gen throughput (token/s): 182.48, #queue-req: 0
  24631. 2025-07-20 17:41:05,319 - __main__ - INFO - sglang running req: 4 queue req: 0
  24632. 2025-07-20 17:41:06,189 - sglang - INFO - [2025-07-20 17:41:06 TP0] Decode batch. #running-req: 4, #token: 10518, token usage: 0.28, gen throughput (token/s): 183.72, #queue-req: 0
  24633. 2025-07-20 17:41:06,190 - __main__ - INFO - sglang running req: 4 queue req: 0
  24634. 2025-07-20 17:41:06,905 - __main__ - INFO - Queue remaining: 1
  24635. 2025-07-20 17:41:06,905 - __main__ - INFO -
  24636. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24637. ----------------------------------------------------------------------------------
  24638. finished_input_tokens 456.19 470.33
  24639. finished_output_tokens 109.66 111.90
  24640. sglang_input_tokens 477.22 432.04
  24641. sglang_output_tokens 121.48 117.48
  24642. 2025-07-20 17:41:06,905 - __main__ - INFO -
  24643. Worker ID | finished | started
  24644. ----------+----------+--------
  24645. 0 | 7 | 11
  24646. 2025-07-20 17:41:07,052 - sglang - INFO - [2025-07-20 17:41:07 TP0] Decode batch. #running-req: 4, #token: 10678, token usage: 0.28, gen throughput (token/s): 185.51, #queue-req: 0
  24647. 2025-07-20 17:41:07,052 - __main__ - INFO - sglang running req: 4 queue req: 0
  24648. 2025-07-20 17:41:07,919 - sglang - INFO - [2025-07-20 17:41:07 TP0] Decode batch. #running-req: 4, #token: 10838, token usage: 0.29, gen throughput (token/s): 184.44, #queue-req: 0
  24649. 2025-07-20 17:41:07,920 - __main__ - INFO - sglang running req: 4 queue req: 0
  24650. 2025-07-20 17:41:08,789 - sglang - INFO - [2025-07-20 17:41:08 TP0] Decode batch. #running-req: 3, #token: 8742, token usage: 0.23, gen throughput (token/s): 157.48, #queue-req: 0
  24651. 2025-07-20 17:41:08,790 - __main__ - INFO - sglang running req: 3 queue req: 0
  24652. 2025-07-20 17:41:09,656 - sglang - INFO - [2025-07-20 17:41:09 TP0] Decode batch. #running-req: 3, #token: 8862, token usage: 0.23, gen throughput (token/s): 138.55, #queue-req: 0
  24653. 2025-07-20 17:41:09,656 - __main__ - INFO - sglang running req: 3 queue req: 0
  24654. 2025-07-20 17:41:10,520 - sglang - INFO - [2025-07-20 17:41:10 TP0] Decode batch. #running-req: 3, #token: 8982, token usage: 0.24, gen throughput (token/s): 138.76, #queue-req: 0
  24655. 2025-07-20 17:41:10,521 - __main__ - INFO - sglang running req: 3 queue req: 0
  24656. 2025-07-20 17:41:11,387 - sglang - INFO - [2025-07-20 17:41:11 TP0] Decode batch. #running-req: 3, #token: 9102, token usage: 0.24, gen throughput (token/s): 138.40, #queue-req: 0
  24657. 2025-07-20 17:41:11,388 - __main__ - INFO - sglang running req: 3 queue req: 0
  24658. 2025-07-20 17:41:12,257 - sglang - INFO - [2025-07-20 17:41:12 TP0] Decode batch. #running-req: 3, #token: 9222, token usage: 0.24, gen throughput (token/s): 137.99, #queue-req: 0
  24659. 2025-07-20 17:41:12,257 - __main__ - INFO - sglang running req: 3 queue req: 0
  24660. 2025-07-20 17:41:13,115 - sglang - INFO - [2025-07-20 17:41:13 TP0] Decode batch. #running-req: 2, #token: 6119, token usage: 0.16, gen throughput (token/s): 96.78, #queue-req: 0
  24661. 2025-07-20 17:41:13,115 - __main__ - INFO - sglang running req: 2 queue req: 0
  24662. 2025-07-20 17:41:13,960 - sglang - INFO - [2025-07-20 17:41:13 TP0] Decode batch. #running-req: 2, #token: 6199, token usage: 0.16, gen throughput (token/s): 94.63, #queue-req: 0
  24663. 2025-07-20 17:41:13,960 - __main__ - INFO - sglang running req: 2 queue req: 0
  24664. 2025-07-20 17:41:14,806 - sglang - INFO - [2025-07-20 17:41:14 TP0] Decode batch. #running-req: 2, #token: 6279, token usage: 0.17, gen throughput (token/s): 94.58, #queue-req: 0
  24665. 2025-07-20 17:41:14,806 - __main__ - INFO - sglang running req: 2 queue req: 0
  24666. 2025-07-20 17:41:15,657 - sglang - INFO - [2025-07-20 17:41:15 TP0] Decode batch. #running-req: 2, #token: 6359, token usage: 0.17, gen throughput (token/s): 93.93, #queue-req: 0
  24667. 2025-07-20 17:41:15,658 - __main__ - INFO - sglang running req: 2 queue req: 0
  24668. 2025-07-20 17:41:16,507 - sglang - INFO - [2025-07-20 17:41:16 TP0] Decode batch. #running-req: 2, #token: 6439, token usage: 0.17, gen throughput (token/s): 94.17, #queue-req: 0
  24669. 2025-07-20 17:41:16,507 - __main__ - INFO - sglang running req: 2 queue req: 0
  24670. 2025-07-20 17:41:16,906 - __main__ - INFO - Queue remaining: 1
  24671. 2025-07-20 17:41:16,907 - __main__ - INFO -
  24672. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24673. ----------------------------------------------------------------------------------
  24674. finished_input_tokens 451.59 470.33
  24675. finished_output_tokens 108.55 111.90
  24676. sglang_input_tokens 479.00 453.82
  24677. sglang_output_tokens 122.24 124.01
  24678. 2025-07-20 17:41:16,907 - __main__ - INFO -
  24679. Worker ID | finished | started
  24680. ----------+----------+--------
  24681. 0 | 10 | 11
  24682. 2025-07-20 17:41:17,351 - sglang - INFO - [2025-07-20 17:41:17 TP0] Decode batch. #running-req: 1, #token: 3413, token usage: 0.09, gen throughput (token/s): 58.09, #queue-req: 0
  24683. 2025-07-20 17:41:17,351 - __main__ - INFO - sglang running req: 1 queue req: 0
  24684. 2025-07-20 17:41:18,193 - sglang - INFO - [2025-07-20 17:41:18 TP0] Decode batch. #running-req: 1, #token: 3453, token usage: 0.09, gen throughput (token/s): 47.49, #queue-req: 0
  24685. 2025-07-20 17:41:18,193 - __main__ - INFO - sglang running req: 1 queue req: 0
  24686. 2025-07-20 17:41:19,041 - sglang - INFO - [2025-07-20 17:41:19 TP0] Decode batch. #running-req: 1, #token: 3493, token usage: 0.09, gen throughput (token/s): 47.17, #queue-req: 0
  24687. 2025-07-20 17:41:19,041 - __main__ - INFO - sglang running req: 1 queue req: 0
  24688. 2025-07-20 17:41:19,889 - sglang - INFO - [2025-07-20 17:41:19 TP0] Decode batch. #running-req: 1, #token: 3533, token usage: 0.09, gen throughput (token/s): 47.16, #queue-req: 0
  24689. 2025-07-20 17:41:19,889 - __main__ - INFO - sglang running req: 1 queue req: 0
  24690. 2025-07-20 17:41:19,937 - __main__ - INFO - Finished TaskGroup for worker on 9face5eb793573e747789b627bf1cc4b334b5b93
  24691. 2025-07-20 17:41:19,938 - __main__ - INFO - Got 1 docs for 9face5eb793573e747789b627bf1cc4b334b5b93
  24692. 2025-07-20 17:41:19,939 - __main__ - INFO - Worker 0 processing work item 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
  24693. 2025-07-20 17:41:19,940 - __main__ - INFO - Created all tasks for 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
  24694. 2025-07-20 17:41:19,947 - __main__ - INFO - Got 9 pages to do for test_pdf/1144520000702630XG344010604301601.pdf in worker 0
  24695. 2025-07-20 17:41:20,073 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-6
  24696. 2025-07-20 17:41:20,080 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-9
  24697. 2025-07-20 17:41:20,091 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-2
  24698. 2025-07-20 17:41:20,098 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-3
  24699. 2025-07-20 17:41:20,100 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-4
  24700. 2025-07-20 17:41:20,133 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-1
  24701. 2025-07-20 17:41:20,143 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  24702. 2025-07-20 17:41:20,168 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-8
  24703. 2025-07-20 17:41:20,186 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-7
  24704. 2025-07-20 17:41:20,229 - sglang - INFO - [2025-07-20 17:41:20 TP0] Prefill batch. #new-seq: 1, #new-token: 1759, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24705. 2025-07-20 17:41:20,230 - __main__ - INFO - sglang running req: 0 queue req: 0
  24706. 2025-07-20 17:41:20,913 - sglang - INFO - [2025-07-20 17:41:20 TP0] Prefill batch. #new-seq: 6, #new-token: 13155, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 2
  24707. 2025-07-20 17:41:20,913 - __main__ - INFO - sglang running req: 1 queue req: 2
  24708. 2025-07-20 17:41:25,747 - sglang - INFO - [2025-07-20 17:41:25 TP0] Decode batch. #running-req: 7, #token: 15180, token usage: 0.40, gen throughput (token/s): 45.75, #queue-req: 2
  24709. 2025-07-20 17:41:25,747 - __main__ - INFO - sglang running req: 7 queue req: 2
  24710. 2025-07-20 17:41:26,644 - sglang - INFO - [2025-07-20 17:41:26 TP0] Decode batch. #running-req: 7, #token: 15460, token usage: 0.41, gen throughput (token/s): 312.14, #queue-req: 2
  24711. 2025-07-20 17:41:26,644 - __main__ - INFO - sglang running req: 7 queue req: 2
  24712. 2025-07-20 17:41:26,908 - __main__ - INFO - Queue remaining: 0
  24713. 2025-07-20 17:41:26,908 - __main__ - INFO -
  24714. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24715. ----------------------------------------------------------------------------------
  24716. finished_input_tokens 470.43 548.22
  24717. finished_output_tokens 112.98 130.29
  24718. sglang_input_tokens 476.79 462.42
  24719. sglang_output_tokens 121.97 127.20
  24720. 2025-07-20 17:41:26,908 - __main__ - INFO -
  24721. Worker ID | started
  24722. ----------+--------
  24723. 0 | 9
  24724. 2025-07-20 17:41:27,531 - sglang - INFO - [2025-07-20 17:41:27 TP0] Decode batch. #running-req: 7, #token: 15740, token usage: 0.41, gen throughput (token/s): 315.73, #queue-req: 2
  24725. 2025-07-20 17:41:27,531 - __main__ - INFO - sglang running req: 7 queue req: 2
  24726. 2025-07-20 17:41:28,418 - sglang - INFO - [2025-07-20 17:41:28 TP0] Decode batch. #running-req: 7, #token: 16020, token usage: 0.42, gen throughput (token/s): 315.50, #queue-req: 2
  24727. 2025-07-20 17:41:28,419 - __main__ - INFO - sglang running req: 7 queue req: 2
  24728. 2025-07-20 17:41:29,315 - sglang - INFO - [2025-07-20 17:41:29 TP0] Decode batch. #running-req: 7, #token: 16300, token usage: 0.43, gen throughput (token/s): 312.34, #queue-req: 2
  24729. 2025-07-20 17:41:29,315 - __main__ - INFO - sglang running req: 7 queue req: 2
  24730. 2025-07-20 17:41:30,209 - sglang - INFO - [2025-07-20 17:41:30 TP0] Decode batch. #running-req: 7, #token: 16580, token usage: 0.44, gen throughput (token/s): 313.00, #queue-req: 2
  24731. 2025-07-20 17:41:30,210 - __main__ - INFO - sglang running req: 7 queue req: 2
  24732. 2025-07-20 17:41:30,791 - sglang - INFO - [2025-07-20 17:41:30 TP0] Prefill batch. #new-seq: 2, #new-token: 4685, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.39, #running-req: 6, #queue-req: 0
  24733. 2025-07-20 17:41:30,791 - __main__ - INFO - sglang running req: 6 queue req: 0
  24734. 2025-07-20 17:41:32,535 - sglang - INFO - [2025-07-20 17:41:32 TP0] Decode batch. #running-req: 8, #token: 19481, token usage: 0.51, gen throughput (token/s): 125.99, #queue-req: 0
  24735. 2025-07-20 17:41:32,535 - __main__ - INFO - sglang running req: 8 queue req: 0
  24736. 2025-07-20 17:41:33,442 - sglang - INFO - [2025-07-20 17:41:33 TP0] Decode batch. #running-req: 8, #token: 19801, token usage: 0.52, gen throughput (token/s): 352.97, #queue-req: 0
  24737. 2025-07-20 17:41:33,442 - __main__ - INFO - sglang running req: 8 queue req: 0
  24738. 2025-07-20 17:41:34,336 - sglang - INFO - [2025-07-20 17:41:34 TP0] Decode batch. #running-req: 7, #token: 17928, token usage: 0.47, gen throughput (token/s): 327.59, #queue-req: 0
  24739. 2025-07-20 17:41:34,336 - __main__ - INFO - sglang running req: 7 queue req: 0
  24740. 2025-07-20 17:41:35,222 - sglang - INFO - [2025-07-20 17:41:35 TP0] Decode batch. #running-req: 6, #token: 15761, token usage: 0.41, gen throughput (token/s): 287.81, #queue-req: 0
  24741. 2025-07-20 17:41:35,222 - __main__ - INFO - sglang running req: 6 queue req: 0
  24742. 2025-07-20 17:41:36,111 - sglang - INFO - [2025-07-20 17:41:36 TP0] Decode batch. #running-req: 5, #token: 13804, token usage: 0.36, gen throughput (token/s): 239.53, #queue-req: 0
  24743. 2025-07-20 17:41:36,111 - __main__ - INFO - sglang running req: 5 queue req: 0
  24744. 2025-07-20 17:41:36,910 - __main__ - INFO - Queue remaining: 0
  24745. 2025-07-20 17:41:36,910 - __main__ - INFO -
  24746. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24747. ----------------------------------------------------------------------------------
  24748. finished_input_tokens 465.77 548.22
  24749. finished_output_tokens 111.86 130.29
  24750. sglang_input_tokens 479.45 487.28
  24751. sglang_output_tokens 122.13 131.81
  24752. 2025-07-20 17:41:36,910 - __main__ - INFO -
  24753. Worker ID | finished | started
  24754. ----------+----------+--------
  24755. 0 | 4 | 9
  24756. 2025-07-20 17:41:36,996 - sglang - INFO - [2025-07-20 17:41:36 TP0] Decode batch. #running-req: 5, #token: 14004, token usage: 0.37, gen throughput (token/s): 225.99, #queue-req: 0
  24757. 2025-07-20 17:41:36,997 - __main__ - INFO - sglang running req: 5 queue req: 0
  24758. 2025-07-20 17:41:37,873 - sglang - INFO - [2025-07-20 17:41:37 TP0] Decode batch. #running-req: 4, #token: 11440, token usage: 0.30, gen throughput (token/s): 216.72, #queue-req: 0
  24759. 2025-07-20 17:41:37,873 - __main__ - INFO - sglang running req: 4 queue req: 0
  24760. 2025-07-20 17:41:38,736 - sglang - INFO - [2025-07-20 17:41:38 TP0] Decode batch. #running-req: 4, #token: 11600, token usage: 0.31, gen throughput (token/s): 185.41, #queue-req: 0
  24761. 2025-07-20 17:41:38,736 - __main__ - INFO - sglang running req: 4 queue req: 0
  24762. 2025-07-20 17:41:39,602 - sglang - INFO - [2025-07-20 17:41:39 TP0] Decode batch. #running-req: 4, #token: 11760, token usage: 0.31, gen throughput (token/s): 184.68, #queue-req: 0
  24763. 2025-07-20 17:41:39,602 - __main__ - INFO - sglang running req: 4 queue req: 0
  24764. 2025-07-20 17:41:40,477 - sglang - INFO - [2025-07-20 17:41:40 TP0] Decode batch. #running-req: 3, #token: 9178, token usage: 0.24, gen throughput (token/s): 176.08, #queue-req: 0
  24765. 2025-07-20 17:41:40,477 - __main__ - INFO - sglang running req: 3 queue req: 0
  24766. 2025-07-20 17:41:41,324 - sglang - INFO - [2025-07-20 17:41:41 TP0] Decode batch. #running-req: 2, #token: 6017, token usage: 0.16, gen throughput (token/s): 102.71, #queue-req: 0
  24767. 2025-07-20 17:41:41,324 - __main__ - INFO - sglang running req: 2 queue req: 0
  24768. 2025-07-20 17:41:42,169 - sglang - INFO - [2025-07-20 17:41:42 TP0] Decode batch. #running-req: 2, #token: 6097, token usage: 0.16, gen throughput (token/s): 94.70, #queue-req: 0
  24769. 2025-07-20 17:41:42,169 - __main__ - INFO - sglang running req: 2 queue req: 0
  24770. 2025-07-20 17:41:43,020 - sglang - INFO - [2025-07-20 17:41:43 TP0] Decode batch. #running-req: 2, #token: 6177, token usage: 0.16, gen throughput (token/s): 93.94, #queue-req: 0
  24771. 2025-07-20 17:41:43,020 - __main__ - INFO - sglang running req: 2 queue req: 0
  24772. 2025-07-20 17:41:43,866 - sglang - INFO - [2025-07-20 17:41:43 TP0] Decode batch. #running-req: 2, #token: 6257, token usage: 0.16, gen throughput (token/s): 94.59, #queue-req: 0
  24773. 2025-07-20 17:41:43,866 - __main__ - INFO - sglang running req: 2 queue req: 0
  24774. 2025-07-20 17:41:44,712 - sglang - INFO - [2025-07-20 17:41:44 TP0] Decode batch. #running-req: 2, #token: 6337, token usage: 0.17, gen throughput (token/s): 94.62, #queue-req: 0
  24775. 2025-07-20 17:41:44,712 - __main__ - INFO - sglang running req: 2 queue req: 0
  24776. 2025-07-20 17:41:45,558 - sglang - INFO - [2025-07-20 17:41:45 TP0] Decode batch. #running-req: 2, #token: 6417, token usage: 0.17, gen throughput (token/s): 94.51, #queue-req: 0
  24777. 2025-07-20 17:41:45,558 - __main__ - INFO - sglang running req: 2 queue req: 0
  24778. 2025-07-20 17:41:46,413 - sglang - INFO - [2025-07-20 17:41:46 TP0] Decode batch. #running-req: 2, #token: 6497, token usage: 0.17, gen throughput (token/s): 93.59, #queue-req: 0
  24779. 2025-07-20 17:41:46,413 - __main__ - INFO - sglang running req: 2 queue req: 0
  24780. 2025-07-20 17:41:46,912 - __main__ - INFO - Queue remaining: 0
  24781. 2025-07-20 17:41:46,912 - __main__ - INFO -
  24782. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24783. ----------------------------------------------------------------------------------
  24784. finished_input_tokens 461.21 548.22
  24785. finished_output_tokens 110.76 130.29
  24786. sglang_input_tokens 481.82 504.34
  24787. sglang_output_tokens 122.43 126.89
  24788. 2025-07-20 17:41:46,912 - __main__ - INFO -
  24789. Worker ID | finished | started
  24790. ----------+----------+--------
  24791. 0 | 7 | 9
  24792. 2025-07-20 17:41:47,268 - sglang - INFO - [2025-07-20 17:41:47 TP0] Decode batch. #running-req: 2, #token: 6577, token usage: 0.17, gen throughput (token/s): 93.50, #queue-req: 0
  24793. 2025-07-20 17:41:47,269 - __main__ - INFO - sglang running req: 2 queue req: 0
  24794. 2025-07-20 17:41:48,113 - sglang - INFO - [2025-07-20 17:41:48 TP0] Decode batch. #running-req: 2, #token: 6657, token usage: 0.18, gen throughput (token/s): 94.74, #queue-req: 0
  24795. 2025-07-20 17:41:48,113 - __main__ - INFO - sglang running req: 2 queue req: 0
  24796. 2025-07-20 17:41:48,959 - sglang - INFO - [2025-07-20 17:41:48 TP0] Decode batch. #running-req: 2, #token: 6737, token usage: 0.18, gen throughput (token/s): 94.54, #queue-req: 0
  24797. 2025-07-20 17:41:48,959 - __main__ - INFO - sglang running req: 2 queue req: 0
  24798. 2025-07-20 17:41:49,812 - sglang - INFO - [2025-07-20 17:41:49 TP0] Decode batch. #running-req: 2, #token: 6817, token usage: 0.18, gen throughput (token/s): 93.76, #queue-req: 0
  24799. 2025-07-20 17:41:49,812 - __main__ - INFO - sglang running req: 2 queue req: 0
  24800. 2025-07-20 17:41:50,661 - sglang - INFO - [2025-07-20 17:41:50 TP0] Decode batch. #running-req: 2, #token: 6897, token usage: 0.18, gen throughput (token/s): 94.27, #queue-req: 0
  24801. 2025-07-20 17:41:50,661 - __main__ - INFO - sglang running req: 2 queue req: 0
  24802. 2025-07-20 17:41:51,512 - sglang - INFO - [2025-07-20 17:41:51 TP0] Decode batch. #running-req: 1, #token: 3766, token usage: 0.10, gen throughput (token/s): 92.83, #queue-req: 0
  24803. 2025-07-20 17:41:51,512 - __main__ - INFO - sglang running req: 1 queue req: 0
  24804. 2025-07-20 17:41:52,354 - sglang - INFO - [2025-07-20 17:41:52 TP0] Decode batch. #running-req: 1, #token: 3806, token usage: 0.10, gen throughput (token/s): 47.51, #queue-req: 0
  24805. 2025-07-20 17:41:52,354 - __main__ - INFO - sglang running req: 1 queue req: 0
  24806. 2025-07-20 17:41:53,201 - sglang - INFO - [2025-07-20 17:41:53 TP0] Decode batch. #running-req: 1, #token: 3846, token usage: 0.10, gen throughput (token/s): 47.22, #queue-req: 0
  24807. 2025-07-20 17:41:53,201 - __main__ - INFO - sglang running req: 1 queue req: 0
  24808. 2025-07-20 17:41:54,048 - sglang - INFO - [2025-07-20 17:41:54 TP0] Decode batch. #running-req: 1, #token: 3886, token usage: 0.10, gen throughput (token/s): 47.23, #queue-req: 0
  24809. 2025-07-20 17:41:54,048 - __main__ - INFO - sglang running req: 1 queue req: 0
  24810. 2025-07-20 17:41:54,883 - sglang - INFO - [2025-07-20 17:41:54 TP0] Decode batch. #running-req: 1, #token: 3926, token usage: 0.10, gen throughput (token/s): 47.93, #queue-req: 0
  24811. 2025-07-20 17:41:54,883 - __main__ - INFO - sglang running req: 1 queue req: 0
  24812. 2025-07-20 17:41:55,718 - sglang - INFO - [2025-07-20 17:41:55 TP0] Decode batch. #running-req: 1, #token: 3966, token usage: 0.10, gen throughput (token/s): 47.87, #queue-req: 0
  24813. 2025-07-20 17:41:55,718 - __main__ - INFO - sglang running req: 1 queue req: 0
  24814. 2025-07-20 17:41:56,562 - sglang - INFO - [2025-07-20 17:41:56 TP0] Decode batch. #running-req: 1, #token: 4006, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
  24815. 2025-07-20 17:41:56,562 - __main__ - INFO - sglang running req: 1 queue req: 0
  24816. 2025-07-20 17:41:56,914 - __main__ - INFO - Queue remaining: 0
  24817. 2025-07-20 17:41:56,915 - __main__ - INFO -
  24818. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24819. ----------------------------------------------------------------------------------
  24820. finished_input_tokens 456.73 548.22
  24821. finished_output_tokens 109.69 130.29
  24822. sglang_input_tokens 479.40 512.07
  24823. sglang_output_tokens 122.11 129.87
  24824. 2025-07-20 17:41:56,915 - __main__ - INFO -
  24825. Worker ID | finished | started
  24826. ----------+----------+--------
  24827. 0 | 8 | 9
  24828. 2025-07-20 17:41:57,407 - sglang - INFO - [2025-07-20 17:41:57 TP0] Decode batch. #running-req: 1, #token: 4046, token usage: 0.11, gen throughput (token/s): 47.35, #queue-req: 0
  24829. 2025-07-20 17:41:57,407 - __main__ - INFO - sglang running req: 1 queue req: 0
  24830. 2025-07-20 17:41:58,250 - sglang - INFO - [2025-07-20 17:41:58 TP0] Decode batch. #running-req: 1, #token: 4086, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
  24831. 2025-07-20 17:41:58,250 - __main__ - INFO - sglang running req: 1 queue req: 0
  24832. 2025-07-20 17:41:59,093 - sglang - INFO - [2025-07-20 17:41:59 TP0] Decode batch. #running-req: 1, #token: 4126, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
  24833. 2025-07-20 17:41:59,094 - __main__ - INFO - sglang running req: 1 queue req: 0
  24834. 2025-07-20 17:41:59,942 - sglang - INFO - [2025-07-20 17:41:59 TP0] Decode batch. #running-req: 1, #token: 4166, token usage: 0.11, gen throughput (token/s): 47.12, #queue-req: 0
  24835. 2025-07-20 17:41:59,942 - __main__ - INFO - sglang running req: 1 queue req: 0
  24836. 2025-07-20 17:42:00,793 - sglang - INFO - [2025-07-20 17:42:00 TP0] Decode batch. #running-req: 1, #token: 4206, token usage: 0.11, gen throughput (token/s): 47.03, #queue-req: 0
  24837. 2025-07-20 17:42:00,793 - __main__ - INFO - sglang running req: 1 queue req: 0
  24838. 2025-07-20 17:42:01,630 - sglang - INFO - [2025-07-20 17:42:01 TP0] Decode batch. #running-req: 1, #token: 4246, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
  24839. 2025-07-20 17:42:01,630 - __main__ - INFO - sglang running req: 1 queue req: 0
  24840. 2025-07-20 17:42:02,467 - sglang - INFO - [2025-07-20 17:42:02 TP0] Decode batch. #running-req: 1, #token: 4286, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
  24841. 2025-07-20 17:42:02,467 - __main__ - INFO - sglang running req: 1 queue req: 0
  24842. 2025-07-20 17:42:03,313 - sglang - INFO - [2025-07-20 17:42:03 TP0] Decode batch. #running-req: 1, #token: 4326, token usage: 0.11, gen throughput (token/s): 47.24, #queue-req: 0
  24843. 2025-07-20 17:42:03,313 - __main__ - INFO - sglang running req: 1 queue req: 0
  24844. 2025-07-20 17:42:04,164 - sglang - INFO - [2025-07-20 17:42:04 TP0] Decode batch. #running-req: 1, #token: 4366, token usage: 0.11, gen throughput (token/s): 47.01, #queue-req: 0
  24845. 2025-07-20 17:42:04,164 - __main__ - INFO - sglang running req: 1 queue req: 0
  24846. 2025-07-20 17:42:05,015 - sglang - INFO - [2025-07-20 17:42:05 TP0] Decode batch. #running-req: 1, #token: 4406, token usage: 0.12, gen throughput (token/s): 47.00, #queue-req: 0
  24847. 2025-07-20 17:42:05,015 - __main__ - INFO - sglang running req: 1 queue req: 0
  24848. 2025-07-20 17:42:05,863 - sglang - INFO - [2025-07-20 17:42:05 TP0] Decode batch. #running-req: 1, #token: 4446, token usage: 0.12, gen throughput (token/s): 47.18, #queue-req: 0
  24849. 2025-07-20 17:42:05,863 - __main__ - INFO - sglang running req: 1 queue req: 0
  24850. 2025-07-20 17:42:06,713 - sglang - INFO - [2025-07-20 17:42:06 TP0] Decode batch. #running-req: 1, #token: 4486, token usage: 0.12, gen throughput (token/s): 47.07, #queue-req: 0
  24851. 2025-07-20 17:42:06,713 - __main__ - INFO - sglang running req: 1 queue req: 0
  24852. 2025-07-20 17:42:06,916 - __main__ - INFO - Queue remaining: 0
  24853. 2025-07-20 17:42:06,916 - __main__ - INFO -
  24854. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24855. ----------------------------------------------------------------------------------
  24856. finished_input_tokens 452.34 548.22
  24857. finished_output_tokens 108.63 130.29
  24858. sglang_input_tokens 474.79 512.07
  24859. sglang_output_tokens 120.93 129.87
  24860. 2025-07-20 17:42:06,916 - __main__ - INFO -
  24861. Worker ID | finished | started
  24862. ----------+----------+--------
  24863. 0 | 8 | 9
  24864. 2025-07-20 17:42:07,568 - sglang - INFO - [2025-07-20 17:42:07 TP0] Decode batch. #running-req: 1, #token: 4526, token usage: 0.12, gen throughput (token/s): 46.80, #queue-req: 0
  24865. 2025-07-20 17:42:07,568 - __main__ - INFO - sglang running req: 1 queue req: 0
  24866. 2025-07-20 17:42:08,415 - sglang - INFO - [2025-07-20 17:42:08 TP0] Decode batch. #running-req: 1, #token: 4566, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
  24867. 2025-07-20 17:42:08,415 - __main__ - INFO - sglang running req: 1 queue req: 0
  24868. 2025-07-20 17:42:09,255 - sglang - INFO - [2025-07-20 17:42:09 TP0] Decode batch. #running-req: 1, #token: 4606, token usage: 0.12, gen throughput (token/s): 47.58, #queue-req: 0
  24869. 2025-07-20 17:42:09,256 - __main__ - INFO - sglang running req: 1 queue req: 0
  24870. 2025-07-20 17:42:10,099 - sglang - INFO - [2025-07-20 17:42:10 TP0] Decode batch. #running-req: 1, #token: 4646, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
  24871. 2025-07-20 17:42:10,099 - __main__ - INFO - sglang running req: 1 queue req: 0
  24872. 2025-07-20 17:42:10,948 - sglang - INFO - [2025-07-20 17:42:10 TP0] Decode batch. #running-req: 1, #token: 4686, token usage: 0.12, gen throughput (token/s): 47.10, #queue-req: 0
  24873. 2025-07-20 17:42:10,949 - __main__ - INFO - sglang running req: 1 queue req: 0
  24874. 2025-07-20 17:42:11,796 - sglang - INFO - [2025-07-20 17:42:11 TP0] Decode batch. #running-req: 1, #token: 4726, token usage: 0.12, gen throughput (token/s): 47.19, #queue-req: 0
  24875. 2025-07-20 17:42:11,796 - __main__ - INFO - sglang running req: 1 queue req: 0
  24876. 2025-07-20 17:42:12,647 - sglang - INFO - [2025-07-20 17:42:12 TP0] Decode batch. #running-req: 1, #token: 4766, token usage: 0.13, gen throughput (token/s): 46.99, #queue-req: 0
  24877. 2025-07-20 17:42:12,648 - __main__ - INFO - sglang running req: 1 queue req: 0
  24878. 2025-07-20 17:42:13,499 - sglang - INFO - [2025-07-20 17:42:13 TP0] Decode batch. #running-req: 1, #token: 4806, token usage: 0.13, gen throughput (token/s): 46.96, #queue-req: 0
  24879. 2025-07-20 17:42:13,499 - __main__ - INFO - sglang running req: 1 queue req: 0
  24880. 2025-07-20 17:42:14,354 - sglang - INFO - [2025-07-20 17:42:14 TP0] Decode batch. #running-req: 1, #token: 4846, token usage: 0.13, gen throughput (token/s): 46.79, #queue-req: 0
  24881. 2025-07-20 17:42:14,354 - __main__ - INFO - sglang running req: 1 queue req: 0
  24882. 2025-07-20 17:42:15,208 - sglang - INFO - [2025-07-20 17:42:15 TP0] Decode batch. #running-req: 1, #token: 4886, token usage: 0.13, gen throughput (token/s): 46.84, #queue-req: 0
  24883. 2025-07-20 17:42:15,208 - __main__ - INFO - sglang running req: 1 queue req: 0
  24884. 2025-07-20 17:42:16,050 - sglang - INFO - [2025-07-20 17:42:16 TP0] Decode batch. #running-req: 1, #token: 4926, token usage: 0.13, gen throughput (token/s): 47.51, #queue-req: 0
  24885. 2025-07-20 17:42:16,050 - __main__ - INFO - sglang running req: 1 queue req: 0
  24886. 2025-07-20 17:42:16,891 - sglang - INFO - [2025-07-20 17:42:16 TP0] Decode batch. #running-req: 1, #token: 4966, token usage: 0.13, gen throughput (token/s): 47.57, #queue-req: 0
  24887. 2025-07-20 17:42:16,891 - __main__ - INFO - sglang running req: 1 queue req: 0
  24888. 2025-07-20 17:42:16,918 - __main__ - INFO - Queue remaining: 0
  24889. 2025-07-20 17:42:16,919 - __main__ - INFO -
  24890. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24891. ----------------------------------------------------------------------------------
  24892. finished_input_tokens 448.04 548.22
  24893. finished_output_tokens 107.60 130.29
  24894. sglang_input_tokens 470.27 512.07
  24895. sglang_output_tokens 119.78 129.87
  24896. 2025-07-20 17:42:16,919 - __main__ - INFO -
  24897. Worker ID | finished | started
  24898. ----------+----------+--------
  24899. 0 | 8 | 9
  24900. 2025-07-20 17:42:17,737 - sglang - INFO - [2025-07-20 17:42:17 TP0] Decode batch. #running-req: 1, #token: 5006, token usage: 0.13, gen throughput (token/s): 47.24, #queue-req: 0
  24901. 2025-07-20 17:42:17,738 - __main__ - INFO - sglang running req: 1 queue req: 0
  24902. 2025-07-20 17:42:18,589 - sglang - INFO - [2025-07-20 17:42:18 TP0] Decode batch. #running-req: 1, #token: 5046, token usage: 0.13, gen throughput (token/s): 46.96, #queue-req: 0
  24903. 2025-07-20 17:42:18,589 - __main__ - INFO - sglang running req: 1 queue req: 0
  24904. 2025-07-20 17:42:19,441 - sglang - INFO - [2025-07-20 17:42:19 TP0] Decode batch. #running-req: 1, #token: 5086, token usage: 0.13, gen throughput (token/s): 46.97, #queue-req: 0
  24905. 2025-07-20 17:42:19,441 - __main__ - INFO - sglang running req: 1 queue req: 0
  24906. 2025-07-20 17:42:20,294 - sglang - INFO - [2025-07-20 17:42:20 TP0] Decode batch. #running-req: 1, #token: 5126, token usage: 0.13, gen throughput (token/s): 46.89, #queue-req: 0
  24907. 2025-07-20 17:42:20,294 - __main__ - INFO - sglang running req: 1 queue req: 0
  24908. 2025-07-20 17:42:21,148 - sglang - INFO - [2025-07-20 17:42:21 TP0] Decode batch. #running-req: 1, #token: 5166, token usage: 0.14, gen throughput (token/s): 46.84, #queue-req: 0
  24909. 2025-07-20 17:42:21,148 - __main__ - INFO - sglang running req: 1 queue req: 0
  24910. 2025-07-20 17:42:22,000 - sglang - INFO - [2025-07-20 17:42:22 TP0] Decode batch. #running-req: 1, #token: 5206, token usage: 0.14, gen throughput (token/s): 46.93, #queue-req: 0
  24911. 2025-07-20 17:42:22,000 - __main__ - INFO - sglang running req: 1 queue req: 0
  24912. 2025-07-20 17:42:22,842 - sglang - INFO - [2025-07-20 17:42:22 TP0] Decode batch. #running-req: 1, #token: 5246, token usage: 0.14, gen throughput (token/s): 47.51, #queue-req: 0
  24913. 2025-07-20 17:42:22,842 - __main__ - INFO - sglang running req: 1 queue req: 0
  24914. 2025-07-20 17:42:23,686 - sglang - INFO - [2025-07-20 17:42:23 TP0] Decode batch. #running-req: 1, #token: 5286, token usage: 0.14, gen throughput (token/s): 47.41, #queue-req: 0
  24915. 2025-07-20 17:42:23,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  24916. 2025-07-20 17:42:24,538 - sglang - INFO - [2025-07-20 17:42:24 TP0] Decode batch. #running-req: 1, #token: 5326, token usage: 0.14, gen throughput (token/s): 46.96, #queue-req: 0
  24917. 2025-07-20 17:42:24,538 - __main__ - INFO - sglang running req: 1 queue req: 0
  24918. 2025-07-20 17:42:25,390 - sglang - INFO - [2025-07-20 17:42:25 TP0] Decode batch. #running-req: 1, #token: 5366, token usage: 0.14, gen throughput (token/s): 46.95, #queue-req: 0
  24919. 2025-07-20 17:42:25,390 - __main__ - INFO - sglang running req: 1 queue req: 0
  24920. 2025-07-20 17:42:26,238 - sglang - INFO - [2025-07-20 17:42:26 TP0] Decode batch. #running-req: 1, #token: 5406, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
  24921. 2025-07-20 17:42:26,238 - __main__ - INFO - sglang running req: 1 queue req: 0
  24922. 2025-07-20 17:42:26,920 - __main__ - INFO - Queue remaining: 0
  24923. 2025-07-20 17:42:26,921 - __main__ - INFO -
  24924. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24925. ----------------------------------------------------------------------------------
  24926. finished_input_tokens 443.81 548.22
  24927. finished_output_tokens 106.58 130.29
  24928. sglang_input_tokens 465.83 512.07
  24929. sglang_output_tokens 118.65 129.87
  24930. 2025-07-20 17:42:26,921 - __main__ - INFO -
  24931. Worker ID | finished | started
  24932. ----------+----------+--------
  24933. 0 | 8 | 9
  24934. 2025-07-20 17:42:27,080 - sglang - INFO - [2025-07-20 17:42:27 TP0] Decode batch. #running-req: 1, #token: 5446, token usage: 0.14, gen throughput (token/s): 47.47, #queue-req: 0
  24935. 2025-07-20 17:42:27,081 - __main__ - INFO - sglang running req: 1 queue req: 0
  24936. 2025-07-20 17:42:27,931 - sglang - INFO - [2025-07-20 17:42:27 TP0] Decode batch. #running-req: 1, #token: 5486, token usage: 0.14, gen throughput (token/s): 47.03, #queue-req: 0
  24937. 2025-07-20 17:42:27,931 - __main__ - INFO - sglang running req: 1 queue req: 0
  24938. 2025-07-20 17:42:28,781 - sglang - INFO - [2025-07-20 17:42:28 TP0] Decode batch. #running-req: 1, #token: 5526, token usage: 0.15, gen throughput (token/s): 47.04, #queue-req: 0
  24939. 2025-07-20 17:42:28,781 - __main__ - INFO - sglang running req: 1 queue req: 0
  24940. 2025-07-20 17:42:29,625 - sglang - INFO - [2025-07-20 17:42:29 TP0] Decode batch. #running-req: 1, #token: 5566, token usage: 0.15, gen throughput (token/s): 47.44, #queue-req: 0
  24941. 2025-07-20 17:42:29,625 - __main__ - INFO - sglang running req: 1 queue req: 0
  24942. 2025-07-20 17:42:30,468 - sglang - INFO - [2025-07-20 17:42:30 TP0] Decode batch. #running-req: 1, #token: 5606, token usage: 0.15, gen throughput (token/s): 47.43, #queue-req: 0
  24943. 2025-07-20 17:42:30,468 - __main__ - INFO - sglang running req: 1 queue req: 0
  24944. 2025-07-20 17:42:30,496 - __main__ - WARNING - JSON decode error on attempt 0 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  24945. 2025-07-20 17:42:30,684 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  24946. 2025-07-20 17:42:30,904 - sglang - INFO - [2025-07-20 17:42:30 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  24947. 2025-07-20 17:42:30,905 - __main__ - INFO - sglang running req: 0 queue req: 0
  24948. 2025-07-20 17:42:32,503 - sglang - INFO - [2025-07-20 17:42:32 TP0] Decode batch. #running-req: 1, #token: 2647, token usage: 0.07, gen throughput (token/s): 19.66, #queue-req: 0
  24949. 2025-07-20 17:42:32,503 - __main__ - INFO - sglang running req: 1 queue req: 0
  24950. 2025-07-20 17:42:33,341 - sglang - INFO - [2025-07-20 17:42:33 TP0] Decode batch. #running-req: 1, #token: 2687, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
  24951. 2025-07-20 17:42:33,341 - __main__ - INFO - sglang running req: 1 queue req: 0
  24952. 2025-07-20 17:42:34,175 - sglang - INFO - [2025-07-20 17:42:34 TP0] Decode batch. #running-req: 1, #token: 2727, token usage: 0.07, gen throughput (token/s): 47.99, #queue-req: 0
  24953. 2025-07-20 17:42:34,175 - __main__ - INFO - sglang running req: 1 queue req: 0
  24954. 2025-07-20 17:42:35,009 - sglang - INFO - [2025-07-20 17:42:35 TP0] Decode batch. #running-req: 1, #token: 2767, token usage: 0.07, gen throughput (token/s): 47.96, #queue-req: 0
  24955. 2025-07-20 17:42:35,009 - __main__ - INFO - sglang running req: 1 queue req: 0
  24956. 2025-07-20 17:42:35,849 - sglang - INFO - [2025-07-20 17:42:35 TP0] Decode batch. #running-req: 1, #token: 2807, token usage: 0.07, gen throughput (token/s): 47.58, #queue-req: 0
  24957. 2025-07-20 17:42:35,850 - __main__ - INFO - sglang running req: 1 queue req: 0
  24958. 2025-07-20 17:42:36,683 - sglang - INFO - [2025-07-20 17:42:36 TP0] Decode batch. #running-req: 1, #token: 2847, token usage: 0.07, gen throughput (token/s): 48.01, #queue-req: 0
  24959. 2025-07-20 17:42:36,683 - __main__ - INFO - sglang running req: 1 queue req: 0
  24960. 2025-07-20 17:42:36,924 - __main__ - INFO - Queue remaining: 0
  24961. 2025-07-20 17:42:36,924 - __main__ - INFO -
  24962. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24963. ----------------------------------------------------------------------------------
  24964. finished_input_tokens 439.66 548.22
  24965. finished_output_tokens 105.59 130.29
  24966. sglang_input_tokens 463.92 520.76
  24967. sglang_output_tokens 120.35 139.87
  24968. 2025-07-20 17:42:36,924 - __main__ - INFO -
  24969. Worker ID | finished | started
  24970. ----------+----------+--------
  24971. 0 | 8 | 9
  24972. 2025-07-20 17:42:37,514 - sglang - INFO - [2025-07-20 17:42:37 TP0] Decode batch. #running-req: 1, #token: 2887, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
  24973. 2025-07-20 17:42:37,515 - __main__ - INFO - sglang running req: 1 queue req: 0
  24974. 2025-07-20 17:42:38,351 - sglang - INFO - [2025-07-20 17:42:38 TP0] Decode batch. #running-req: 1, #token: 2927, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  24975. 2025-07-20 17:42:38,351 - __main__ - INFO - sglang running req: 1 queue req: 0
  24976. 2025-07-20 17:42:39,195 - sglang - INFO - [2025-07-20 17:42:39 TP0] Decode batch. #running-req: 1, #token: 2967, token usage: 0.08, gen throughput (token/s): 47.37, #queue-req: 0
  24977. 2025-07-20 17:42:39,195 - __main__ - INFO - sglang running req: 1 queue req: 0
  24978. 2025-07-20 17:42:40,033 - sglang - INFO - [2025-07-20 17:42:40 TP0] Decode batch. #running-req: 1, #token: 3007, token usage: 0.08, gen throughput (token/s): 47.76, #queue-req: 0
  24979. 2025-07-20 17:42:40,033 - __main__ - INFO - sglang running req: 1 queue req: 0
  24980. 2025-07-20 17:42:40,869 - sglang - INFO - [2025-07-20 17:42:40 TP0] Decode batch. #running-req: 1, #token: 3047, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  24981. 2025-07-20 17:42:40,870 - __main__ - INFO - sglang running req: 1 queue req: 0
  24982. 2025-07-20 17:42:41,707 - sglang - INFO - [2025-07-20 17:42:41 TP0] Decode batch. #running-req: 1, #token: 3087, token usage: 0.08, gen throughput (token/s): 47.77, #queue-req: 0
  24983. 2025-07-20 17:42:41,707 - __main__ - INFO - sglang running req: 1 queue req: 0
  24984. 2025-07-20 17:42:42,550 - sglang - INFO - [2025-07-20 17:42:42 TP0] Decode batch. #running-req: 1, #token: 3127, token usage: 0.08, gen throughput (token/s): 47.42, #queue-req: 0
  24985. 2025-07-20 17:42:42,550 - __main__ - INFO - sglang running req: 1 queue req: 0
  24986. 2025-07-20 17:42:43,392 - sglang - INFO - [2025-07-20 17:42:43 TP0] Decode batch. #running-req: 1, #token: 3167, token usage: 0.08, gen throughput (token/s): 47.50, #queue-req: 0
  24987. 2025-07-20 17:42:43,392 - __main__ - INFO - sglang running req: 1 queue req: 0
  24988. 2025-07-20 17:42:44,225 - sglang - INFO - [2025-07-20 17:42:44 TP0] Decode batch. #running-req: 1, #token: 3207, token usage: 0.08, gen throughput (token/s): 48.04, #queue-req: 0
  24989. 2025-07-20 17:42:44,225 - __main__ - INFO - sglang running req: 1 queue req: 0
  24990. 2025-07-20 17:42:45,061 - sglang - INFO - [2025-07-20 17:42:45 TP0] Decode batch. #running-req: 1, #token: 3247, token usage: 0.09, gen throughput (token/s): 47.83, #queue-req: 0
  24991. 2025-07-20 17:42:45,061 - __main__ - INFO - sglang running req: 1 queue req: 0
  24992. 2025-07-20 17:42:45,902 - sglang - INFO - [2025-07-20 17:42:45 TP0] Decode batch. #running-req: 1, #token: 3287, token usage: 0.09, gen throughput (token/s): 47.59, #queue-req: 0
  24993. 2025-07-20 17:42:45,902 - __main__ - INFO - sglang running req: 1 queue req: 0
  24994. 2025-07-20 17:42:46,740 - sglang - INFO - [2025-07-20 17:42:46 TP0] Decode batch. #running-req: 1, #token: 3327, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  24995. 2025-07-20 17:42:46,741 - __main__ - INFO - sglang running req: 1 queue req: 0
  24996. 2025-07-20 17:42:46,925 - __main__ - INFO - Queue remaining: 0
  24997. 2025-07-20 17:42:46,926 - __main__ - INFO -
  24998. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  24999. ----------------------------------------------------------------------------------
  25000. finished_input_tokens 435.59 548.22
  25001. finished_output_tokens 104.61 130.29
  25002. sglang_input_tokens 459.62 520.76
  25003. sglang_output_tokens 119.23 139.87
  25004. 2025-07-20 17:42:46,926 - __main__ - INFO -
  25005. Worker ID | finished | started
  25006. ----------+----------+--------
  25007. 0 | 8 | 9
  25008. 2025-07-20 17:42:47,578 - sglang - INFO - [2025-07-20 17:42:47 TP0] Decode batch. #running-req: 1, #token: 3367, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
  25009. 2025-07-20 17:42:47,578 - __main__ - INFO - sglang running req: 1 queue req: 0
  25010. 2025-07-20 17:42:48,419 - sglang - INFO - [2025-07-20 17:42:48 TP0] Decode batch. #running-req: 1, #token: 3407, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
  25011. 2025-07-20 17:42:48,419 - __main__ - INFO - sglang running req: 1 queue req: 0
  25012. 2025-07-20 17:42:49,264 - sglang - INFO - [2025-07-20 17:42:49 TP0] Decode batch. #running-req: 1, #token: 3447, token usage: 0.09, gen throughput (token/s): 47.30, #queue-req: 0
  25013. 2025-07-20 17:42:49,264 - __main__ - INFO - sglang running req: 1 queue req: 0
  25014. 2025-07-20 17:42:50,108 - sglang - INFO - [2025-07-20 17:42:50 TP0] Decode batch. #running-req: 1, #token: 3487, token usage: 0.09, gen throughput (token/s): 47.43, #queue-req: 0
  25015. 2025-07-20 17:42:50,108 - __main__ - INFO - sglang running req: 1 queue req: 0
  25016. 2025-07-20 17:42:50,941 - sglang - INFO - [2025-07-20 17:42:50 TP0] Decode batch. #running-req: 1, #token: 3527, token usage: 0.09, gen throughput (token/s): 48.01, #queue-req: 0
  25017. 2025-07-20 17:42:50,941 - __main__ - INFO - sglang running req: 1 queue req: 0
  25018. 2025-07-20 17:42:51,775 - sglang - INFO - [2025-07-20 17:42:51 TP0] Decode batch. #running-req: 1, #token: 3567, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
  25019. 2025-07-20 17:42:51,775 - __main__ - INFO - sglang running req: 1 queue req: 0
  25020. 2025-07-20 17:42:52,615 - sglang - INFO - [2025-07-20 17:42:52 TP0] Decode batch. #running-req: 1, #token: 3607, token usage: 0.09, gen throughput (token/s): 47.60, #queue-req: 0
  25021. 2025-07-20 17:42:52,615 - __main__ - INFO - sglang running req: 1 queue req: 0
  25022. 2025-07-20 17:42:53,453 - sglang - INFO - [2025-07-20 17:42:53 TP0] Decode batch. #running-req: 1, #token: 3647, token usage: 0.10, gen throughput (token/s): 47.74, #queue-req: 0
  25023. 2025-07-20 17:42:53,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  25024. 2025-07-20 17:42:54,292 - sglang - INFO - [2025-07-20 17:42:54 TP0] Decode batch. #running-req: 1, #token: 3687, token usage: 0.10, gen throughput (token/s): 47.70, #queue-req: 0
  25025. 2025-07-20 17:42:54,292 - __main__ - INFO - sglang running req: 1 queue req: 0
  25026. 2025-07-20 17:42:55,132 - sglang - INFO - [2025-07-20 17:42:55 TP0] Decode batch. #running-req: 1, #token: 3727, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
  25027. 2025-07-20 17:42:55,132 - __main__ - INFO - sglang running req: 1 queue req: 0
  25028. 2025-07-20 17:42:55,976 - sglang - INFO - [2025-07-20 17:42:55 TP0] Decode batch. #running-req: 1, #token: 3767, token usage: 0.10, gen throughput (token/s): 47.39, #queue-req: 0
  25029. 2025-07-20 17:42:55,976 - __main__ - INFO - sglang running req: 1 queue req: 0
  25030. 2025-07-20 17:42:56,822 - sglang - INFO - [2025-07-20 17:42:56 TP0] Decode batch. #running-req: 1, #token: 3807, token usage: 0.10, gen throughput (token/s): 47.24, #queue-req: 0
  25031. 2025-07-20 17:42:56,822 - __main__ - INFO - sglang running req: 1 queue req: 0
  25032. 2025-07-20 17:42:56,928 - __main__ - INFO - Queue remaining: 0
  25033. 2025-07-20 17:42:56,928 - __main__ - INFO -
  25034. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25035. ----------------------------------------------------------------------------------
  25036. finished_input_tokens 431.60 548.22
  25037. finished_output_tokens 103.65 130.29
  25038. sglang_input_tokens 455.41 520.76
  25039. sglang_output_tokens 118.14 139.87
  25040. 2025-07-20 17:42:56,928 - __main__ - INFO -
  25041. Worker ID | finished | started
  25042. ----------+----------+--------
  25043. 0 | 8 | 9
  25044. 2025-07-20 17:42:57,661 - sglang - INFO - [2025-07-20 17:42:57 TP0] Decode batch. #running-req: 1, #token: 3847, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
  25045. 2025-07-20 17:42:57,662 - __main__ - INFO - sglang running req: 1 queue req: 0
  25046. 2025-07-20 17:42:58,497 - sglang - INFO - [2025-07-20 17:42:58 TP0] Decode batch. #running-req: 1, #token: 3887, token usage: 0.10, gen throughput (token/s): 47.88, #queue-req: 0
  25047. 2025-07-20 17:42:58,497 - __main__ - INFO - sglang running req: 1 queue req: 0
  25048. 2025-07-20 17:42:59,336 - sglang - INFO - [2025-07-20 17:42:59 TP0] Decode batch. #running-req: 1, #token: 3927, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
  25049. 2025-07-20 17:42:59,336 - __main__ - INFO - sglang running req: 1 queue req: 0
  25050. 2025-07-20 17:43:00,178 - sglang - INFO - [2025-07-20 17:43:00 TP0] Decode batch. #running-req: 1, #token: 3967, token usage: 0.10, gen throughput (token/s): 47.50, #queue-req: 0
  25051. 2025-07-20 17:43:00,179 - __main__ - INFO - sglang running req: 1 queue req: 0
  25052. 2025-07-20 17:43:01,022 - sglang - INFO - [2025-07-20 17:43:01 TP0] Decode batch. #running-req: 1, #token: 4007, token usage: 0.11, gen throughput (token/s): 47.41, #queue-req: 0
  25053. 2025-07-20 17:43:01,022 - __main__ - INFO - sglang running req: 1 queue req: 0
  25054. 2025-07-20 17:43:01,865 - sglang - INFO - [2025-07-20 17:43:01 TP0] Decode batch. #running-req: 1, #token: 4047, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
  25055. 2025-07-20 17:43:01,865 - __main__ - INFO - sglang running req: 1 queue req: 0
  25056. 2025-07-20 17:43:02,709 - sglang - INFO - [2025-07-20 17:43:02 TP0] Decode batch. #running-req: 1, #token: 4087, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
  25057. 2025-07-20 17:43:02,710 - __main__ - INFO - sglang running req: 1 queue req: 0
  25058. 2025-07-20 17:43:03,556 - sglang - INFO - [2025-07-20 17:43:03 TP0] Decode batch. #running-req: 1, #token: 4127, token usage: 0.11, gen throughput (token/s): 47.27, #queue-req: 0
  25059. 2025-07-20 17:43:03,556 - __main__ - INFO - sglang running req: 1 queue req: 0
  25060. 2025-07-20 17:43:04,394 - sglang - INFO - [2025-07-20 17:43:04 TP0] Decode batch. #running-req: 1, #token: 4167, token usage: 0.11, gen throughput (token/s): 47.72, #queue-req: 0
  25061. 2025-07-20 17:43:04,394 - __main__ - INFO - sglang running req: 1 queue req: 0
  25062. 2025-07-20 17:43:05,231 - sglang - INFO - [2025-07-20 17:43:05 TP0] Decode batch. #running-req: 1, #token: 4207, token usage: 0.11, gen throughput (token/s): 47.80, #queue-req: 0
  25063. 2025-07-20 17:43:05,231 - __main__ - INFO - sglang running req: 1 queue req: 0
  25064. 2025-07-20 17:43:06,070 - sglang - INFO - [2025-07-20 17:43:06 TP0] Decode batch. #running-req: 1, #token: 4247, token usage: 0.11, gen throughput (token/s): 47.64, #queue-req: 0
  25065. 2025-07-20 17:43:06,071 - __main__ - INFO - sglang running req: 1 queue req: 0
  25066. 2025-07-20 17:43:06,910 - sglang - INFO - [2025-07-20 17:43:06 TP0] Decode batch. #running-req: 1, #token: 4287, token usage: 0.11, gen throughput (token/s): 47.62, #queue-req: 0
  25067. 2025-07-20 17:43:06,911 - __main__ - INFO - sglang running req: 1 queue req: 0
  25068. 2025-07-20 17:43:06,930 - __main__ - INFO - Queue remaining: 0
  25069. 2025-07-20 17:43:06,930 - __main__ - INFO -
  25070. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25071. ----------------------------------------------------------------------------------
  25072. finished_input_tokens 427.68 548.22
  25073. finished_output_tokens 102.71 130.29
  25074. sglang_input_tokens 451.27 520.76
  25075. sglang_output_tokens 117.07 139.87
  25076. 2025-07-20 17:43:06,930 - __main__ - INFO -
  25077. Worker ID | finished | started
  25078. ----------+----------+--------
  25079. 0 | 8 | 9
  25080. 2025-07-20 17:43:07,755 - sglang - INFO - [2025-07-20 17:43:07 TP0] Decode batch. #running-req: 1, #token: 4327, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
  25081. 2025-07-20 17:43:07,755 - __main__ - INFO - sglang running req: 1 queue req: 0
  25082. 2025-07-20 17:43:08,599 - sglang - INFO - [2025-07-20 17:43:08 TP0] Decode batch. #running-req: 1, #token: 4367, token usage: 0.11, gen throughput (token/s): 47.38, #queue-req: 0
  25083. 2025-07-20 17:43:08,599 - __main__ - INFO - sglang running req: 1 queue req: 0
  25084. 2025-07-20 17:43:09,445 - sglang - INFO - [2025-07-20 17:43:09 TP0] Decode batch. #running-req: 1, #token: 4407, token usage: 0.12, gen throughput (token/s): 47.25, #queue-req: 0
  25085. 2025-07-20 17:43:09,446 - __main__ - INFO - sglang running req: 1 queue req: 0
  25086. 2025-07-20 17:43:10,293 - sglang - INFO - [2025-07-20 17:43:10 TP0] Decode batch. #running-req: 1, #token: 4447, token usage: 0.12, gen throughput (token/s): 47.22, #queue-req: 0
  25087. 2025-07-20 17:43:10,293 - __main__ - INFO - sglang running req: 1 queue req: 0
  25088. 2025-07-20 17:43:11,134 - sglang - INFO - [2025-07-20 17:43:11 TP0] Decode batch. #running-req: 1, #token: 4487, token usage: 0.12, gen throughput (token/s): 47.54, #queue-req: 0
  25089. 2025-07-20 17:43:11,134 - __main__ - INFO - sglang running req: 1 queue req: 0
  25090. 2025-07-20 17:43:11,973 - sglang - INFO - [2025-07-20 17:43:11 TP0] Decode batch. #running-req: 1, #token: 4527, token usage: 0.12, gen throughput (token/s): 47.70, #queue-req: 0
  25091. 2025-07-20 17:43:11,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  25092. 2025-07-20 17:43:12,813 - sglang - INFO - [2025-07-20 17:43:12 TP0] Decode batch. #running-req: 1, #token: 4567, token usage: 0.12, gen throughput (token/s): 47.59, #queue-req: 0
  25093. 2025-07-20 17:43:12,813 - __main__ - INFO - sglang running req: 1 queue req: 0
  25094. 2025-07-20 17:43:13,655 - sglang - INFO - [2025-07-20 17:43:13 TP0] Decode batch. #running-req: 1, #token: 4607, token usage: 0.12, gen throughput (token/s): 47.50, #queue-req: 0
  25095. 2025-07-20 17:43:13,655 - __main__ - INFO - sglang running req: 1 queue req: 0
  25096. 2025-07-20 17:43:14,499 - sglang - INFO - [2025-07-20 17:43:14 TP0] Decode batch. #running-req: 1, #token: 4647, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
  25097. 2025-07-20 17:43:14,499 - __main__ - INFO - sglang running req: 1 queue req: 0
  25098. 2025-07-20 17:43:15,340 - sglang - INFO - [2025-07-20 17:43:15 TP0] Decode batch. #running-req: 1, #token: 4687, token usage: 0.12, gen throughput (token/s): 47.57, #queue-req: 0
  25099. 2025-07-20 17:43:15,340 - __main__ - INFO - sglang running req: 1 queue req: 0
  25100. 2025-07-20 17:43:16,183 - sglang - INFO - [2025-07-20 17:43:16 TP0] Decode batch. #running-req: 1, #token: 4727, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
  25101. 2025-07-20 17:43:16,183 - __main__ - INFO - sglang running req: 1 queue req: 0
  25102. 2025-07-20 17:43:16,932 - __main__ - INFO - Queue remaining: 0
  25103. 2025-07-20 17:43:16,932 - __main__ - INFO -
  25104. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25105. ----------------------------------------------------------------------------------
  25106. finished_input_tokens 423.83 548.22
  25107. finished_output_tokens 101.79 130.29
  25108. sglang_input_tokens 447.21 520.76
  25109. sglang_output_tokens 116.01 139.87
  25110. 2025-07-20 17:43:16,933 - __main__ - INFO -
  25111. Worker ID | finished | started
  25112. ----------+----------+--------
  25113. 0 | 8 | 9
  25114. 2025-07-20 17:43:17,029 - sglang - INFO - [2025-07-20 17:43:17 TP0] Decode batch. #running-req: 1, #token: 4767, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
  25115. 2025-07-20 17:43:17,029 - __main__ - INFO - sglang running req: 1 queue req: 0
  25116. 2025-07-20 17:43:17,868 - sglang - INFO - [2025-07-20 17:43:17 TP0] Decode batch. #running-req: 1, #token: 4807, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
  25117. 2025-07-20 17:43:17,869 - __main__ - INFO - sglang running req: 1 queue req: 0
  25118. 2025-07-20 17:43:18,706 - sglang - INFO - [2025-07-20 17:43:18 TP0] Decode batch. #running-req: 1, #token: 4847, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
  25119. 2025-07-20 17:43:18,706 - __main__ - INFO - sglang running req: 1 queue req: 0
  25120. 2025-07-20 17:43:19,549 - sglang - INFO - [2025-07-20 17:43:19 TP0] Decode batch. #running-req: 1, #token: 4887, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
  25121. 2025-07-20 17:43:19,549 - __main__ - INFO - sglang running req: 1 queue req: 0
  25122. 2025-07-20 17:43:20,396 - sglang - INFO - [2025-07-20 17:43:20 TP0] Decode batch. #running-req: 1, #token: 4927, token usage: 0.13, gen throughput (token/s): 47.23, #queue-req: 0
  25123. 2025-07-20 17:43:20,396 - __main__ - INFO - sglang running req: 1 queue req: 0
  25124. 2025-07-20 17:43:21,239 - sglang - INFO - [2025-07-20 17:43:21 TP0] Decode batch. #running-req: 1, #token: 4967, token usage: 0.13, gen throughput (token/s): 47.48, #queue-req: 0
  25125. 2025-07-20 17:43:21,239 - __main__ - INFO - sglang running req: 1 queue req: 0
  25126. 2025-07-20 17:43:22,077 - sglang - INFO - [2025-07-20 17:43:22 TP0] Decode batch. #running-req: 1, #token: 5007, token usage: 0.13, gen throughput (token/s): 47.72, #queue-req: 0
  25127. 2025-07-20 17:43:22,077 - __main__ - INFO - sglang running req: 1 queue req: 0
  25128. 2025-07-20 17:43:22,917 - sglang - INFO - [2025-07-20 17:43:22 TP0] Decode batch. #running-req: 1, #token: 5047, token usage: 0.13, gen throughput (token/s): 47.62, #queue-req: 0
  25129. 2025-07-20 17:43:22,917 - __main__ - INFO - sglang running req: 1 queue req: 0
  25130. 2025-07-20 17:43:23,764 - sglang - INFO - [2025-07-20 17:43:23 TP0] Decode batch. #running-req: 1, #token: 5087, token usage: 0.13, gen throughput (token/s): 47.21, #queue-req: 0
  25131. 2025-07-20 17:43:23,764 - __main__ - INFO - sglang running req: 1 queue req: 0
  25132. 2025-07-20 17:43:24,610 - sglang - INFO - [2025-07-20 17:43:24 TP0] Decode batch. #running-req: 1, #token: 5127, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
  25133. 2025-07-20 17:43:24,610 - __main__ - INFO - sglang running req: 1 queue req: 0
  25134. 2025-07-20 17:43:25,451 - sglang - INFO - [2025-07-20 17:43:25 TP0] Decode batch. #running-req: 1, #token: 5167, token usage: 0.14, gen throughput (token/s): 47.56, #queue-req: 0
  25135. 2025-07-20 17:43:25,451 - __main__ - INFO - sglang running req: 1 queue req: 0
  25136. 2025-07-20 17:43:26,292 - sglang - INFO - [2025-07-20 17:43:26 TP0] Decode batch. #running-req: 1, #token: 5207, token usage: 0.14, gen throughput (token/s): 47.52, #queue-req: 0
  25137. 2025-07-20 17:43:26,292 - __main__ - INFO - sglang running req: 1 queue req: 0
  25138. 2025-07-20 17:43:26,934 - __main__ - INFO - Queue remaining: 0
  25139. 2025-07-20 17:43:26,935 - __main__ - INFO -
  25140. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25141. ----------------------------------------------------------------------------------
  25142. finished_input_tokens 420.04 548.22
  25143. finished_output_tokens 100.88 130.29
  25144. sglang_input_tokens 443.21 520.76
  25145. sglang_output_tokens 114.98 139.87
  25146. 2025-07-20 17:43:26,935 - __main__ - INFO -
  25147. Worker ID | finished | started
  25148. ----------+----------+--------
  25149. 0 | 8 | 9
  25150. 2025-07-20 17:43:27,140 - sglang - INFO - [2025-07-20 17:43:27 TP0] Decode batch. #running-req: 1, #token: 5247, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
  25151. 2025-07-20 17:43:27,140 - __main__ - INFO - sglang running req: 1 queue req: 0
  25152. 2025-07-20 17:43:27,986 - sglang - INFO - [2025-07-20 17:43:27 TP0] Decode batch. #running-req: 1, #token: 5287, token usage: 0.14, gen throughput (token/s): 47.28, #queue-req: 0
  25153. 2025-07-20 17:43:27,986 - __main__ - INFO - sglang running req: 1 queue req: 0
  25154. 2025-07-20 17:43:28,827 - sglang - INFO - [2025-07-20 17:43:28 TP0] Decode batch. #running-req: 1, #token: 5327, token usage: 0.14, gen throughput (token/s): 47.58, #queue-req: 0
  25155. 2025-07-20 17:43:28,827 - __main__ - INFO - sglang running req: 1 queue req: 0
  25156. 2025-07-20 17:43:29,667 - sglang - INFO - [2025-07-20 17:43:29 TP0] Decode batch. #running-req: 1, #token: 5367, token usage: 0.14, gen throughput (token/s): 47.59, #queue-req: 0
  25157. 2025-07-20 17:43:29,668 - __main__ - INFO - sglang running req: 1 queue req: 0
  25158. 2025-07-20 17:43:30,507 - sglang - INFO - [2025-07-20 17:43:30 TP0] Decode batch. #running-req: 1, #token: 5407, token usage: 0.14, gen throughput (token/s): 47.60, #queue-req: 0
  25159. 2025-07-20 17:43:30,508 - __main__ - INFO - sglang running req: 1 queue req: 0
  25160. 2025-07-20 17:43:31,355 - sglang - INFO - [2025-07-20 17:43:31 TP0] Decode batch. #running-req: 1, #token: 5447, token usage: 0.14, gen throughput (token/s): 47.18, #queue-req: 0
  25161. 2025-07-20 17:43:31,355 - __main__ - INFO - sglang running req: 1 queue req: 0
  25162. 2025-07-20 17:43:32,198 - sglang - INFO - [2025-07-20 17:43:32 TP0] Decode batch. #running-req: 1, #token: 5487, token usage: 0.14, gen throughput (token/s): 47.46, #queue-req: 0
  25163. 2025-07-20 17:43:32,198 - __main__ - INFO - sglang running req: 1 queue req: 0
  25164. 2025-07-20 17:43:33,040 - sglang - INFO - [2025-07-20 17:43:33 TP0] Decode batch. #running-req: 1, #token: 5527, token usage: 0.15, gen throughput (token/s): 47.54, #queue-req: 0
  25165. 2025-07-20 17:43:33,040 - __main__ - INFO - sglang running req: 1 queue req: 0
  25166. 2025-07-20 17:43:33,883 - sglang - INFO - [2025-07-20 17:43:33 TP0] Decode batch. #running-req: 1, #token: 5567, token usage: 0.15, gen throughput (token/s): 47.40, #queue-req: 0
  25167. 2025-07-20 17:43:33,884 - __main__ - INFO - sglang running req: 1 queue req: 0
  25168. 2025-07-20 17:43:34,730 - sglang - INFO - [2025-07-20 17:43:34 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 47.26, #queue-req: 0
  25169. 2025-07-20 17:43:34,730 - __main__ - INFO - sglang running req: 1 queue req: 0
  25170. 2025-07-20 17:43:34,736 - __main__ - WARNING - JSON decode error on attempt 1 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  25171. 2025-07-20 17:43:34,924 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  25172. 2025-07-20 17:43:35,115 - sglang - INFO - [2025-07-20 17:43:35 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  25173. 2025-07-20 17:43:35,116 - __main__ - INFO - sglang running req: 0 queue req: 0
  25174. 2025-07-20 17:43:36,727 - sglang - INFO - [2025-07-20 17:43:36 TP0] Decode batch. #running-req: 1, #token: 2648, token usage: 0.07, gen throughput (token/s): 20.03, #queue-req: 0
  25175. 2025-07-20 17:43:36,727 - __main__ - INFO - sglang running req: 1 queue req: 0
  25176. 2025-07-20 17:43:36,937 - __main__ - INFO - Queue remaining: 0
  25177. 2025-07-20 17:43:36,937 - __main__ - INFO -
  25178. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25179. ----------------------------------------------------------------------------------
  25180. finished_input_tokens 416.33 358.35
  25181. finished_output_tokens 99.98 86.68
  25182. sglang_input_tokens 441.60 432.38
  25183. sglang_output_tokens 116.61 119.35
  25184. 2025-07-20 17:43:36,938 - __main__ - INFO -
  25185. Worker ID | finished | started
  25186. ----------+----------+--------
  25187. 0 | 8 | 9
  25188. 2025-07-20 17:43:37,565 - sglang - INFO - [2025-07-20 17:43:37 TP0] Decode batch. #running-req: 1, #token: 2688, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  25189. 2025-07-20 17:43:37,565 - __main__ - INFO - sglang running req: 1 queue req: 0
  25190. 2025-07-20 17:43:38,408 - sglang - INFO - [2025-07-20 17:43:38 TP0] Decode batch. #running-req: 1, #token: 2728, token usage: 0.07, gen throughput (token/s): 47.46, #queue-req: 0
  25191. 2025-07-20 17:43:38,408 - __main__ - INFO - sglang running req: 1 queue req: 0
  25192. 2025-07-20 17:43:39,244 - sglang - INFO - [2025-07-20 17:43:39 TP0] Decode batch. #running-req: 1, #token: 2768, token usage: 0.07, gen throughput (token/s): 47.87, #queue-req: 0
  25193. 2025-07-20 17:43:39,244 - __main__ - INFO - sglang running req: 1 queue req: 0
  25194. 2025-07-20 17:43:40,076 - sglang - INFO - [2025-07-20 17:43:40 TP0] Decode batch. #running-req: 1, #token: 2808, token usage: 0.07, gen throughput (token/s): 48.04, #queue-req: 0
  25195. 2025-07-20 17:43:40,076 - __main__ - INFO - sglang running req: 1 queue req: 0
  25196. 2025-07-20 17:43:40,913 - sglang - INFO - [2025-07-20 17:43:40 TP0] Decode batch. #running-req: 1, #token: 2848, token usage: 0.07, gen throughput (token/s): 47.80, #queue-req: 0
  25197. 2025-07-20 17:43:40,913 - __main__ - INFO - sglang running req: 1 queue req: 0
  25198. 2025-07-20 17:43:41,750 - sglang - INFO - [2025-07-20 17:43:41 TP0] Decode batch. #running-req: 1, #token: 2888, token usage: 0.08, gen throughput (token/s): 47.78, #queue-req: 0
  25199. 2025-07-20 17:43:41,750 - __main__ - INFO - sglang running req: 1 queue req: 0
  25200. 2025-07-20 17:43:42,587 - sglang - INFO - [2025-07-20 17:43:42 TP0] Decode batch. #running-req: 1, #token: 2928, token usage: 0.08, gen throughput (token/s): 47.78, #queue-req: 0
  25201. 2025-07-20 17:43:42,588 - __main__ - INFO - sglang running req: 1 queue req: 0
  25202. 2025-07-20 17:43:43,427 - sglang - INFO - [2025-07-20 17:43:43 TP0] Decode batch. #running-req: 1, #token: 2968, token usage: 0.08, gen throughput (token/s): 47.66, #queue-req: 0
  25203. 2025-07-20 17:43:43,427 - __main__ - INFO - sglang running req: 1 queue req: 0
  25204. 2025-07-20 17:43:44,268 - sglang - INFO - [2025-07-20 17:43:44 TP0] Decode batch. #running-req: 1, #token: 3008, token usage: 0.08, gen throughput (token/s): 47.57, #queue-req: 0
  25205. 2025-07-20 17:43:44,268 - __main__ - INFO - sglang running req: 1 queue req: 0
  25206. 2025-07-20 17:43:45,111 - sglang - INFO - [2025-07-20 17:43:45 TP0] Decode batch. #running-req: 1, #token: 3048, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
  25207. 2025-07-20 17:43:45,111 - __main__ - INFO - sglang running req: 1 queue req: 0
  25208. 2025-07-20 17:43:45,949 - sglang - INFO - [2025-07-20 17:43:45 TP0] Decode batch. #running-req: 1, #token: 3088, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
  25209. 2025-07-20 17:43:45,950 - __main__ - INFO - sglang running req: 1 queue req: 0
  25210. 2025-07-20 17:43:46,781 - sglang - INFO - [2025-07-20 17:43:46 TP0] Decode batch. #running-req: 1, #token: 3128, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
  25211. 2025-07-20 17:43:46,781 - __main__ - INFO - sglang running req: 1 queue req: 0
  25212. 2025-07-20 17:43:46,939 - __main__ - INFO - Queue remaining: 0
  25213. 2025-07-20 17:43:46,939 - __main__ - INFO -
  25214. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25215. ----------------------------------------------------------------------------------
  25216. finished_input_tokens 412.68 358.35
  25217. finished_output_tokens 99.11 86.68
  25218. sglang_input_tokens 437.73 432.38
  25219. sglang_output_tokens 115.59 119.35
  25220. 2025-07-20 17:43:46,939 - __main__ - INFO -
  25221. Worker ID | finished | started
  25222. ----------+----------+--------
  25223. 0 | 8 | 9
  25224. 2025-07-20 17:43:47,614 - sglang - INFO - [2025-07-20 17:43:47 TP0] Decode batch. #running-req: 1, #token: 3168, token usage: 0.08, gen throughput (token/s): 48.00, #queue-req: 0
  25225. 2025-07-20 17:43:47,614 - __main__ - INFO - sglang running req: 1 queue req: 0
  25226. 2025-07-20 17:43:48,455 - sglang - INFO - [2025-07-20 17:43:48 TP0] Decode batch. #running-req: 1, #token: 3208, token usage: 0.08, gen throughput (token/s): 47.57, #queue-req: 0
  25227. 2025-07-20 17:43:48,455 - __main__ - INFO - sglang running req: 1 queue req: 0
  25228. 2025-07-20 17:43:49,293 - sglang - INFO - [2025-07-20 17:43:49 TP0] Decode batch. #running-req: 1, #token: 3248, token usage: 0.09, gen throughput (token/s): 47.74, #queue-req: 0
  25229. 2025-07-20 17:43:49,293 - __main__ - INFO - sglang running req: 1 queue req: 0
  25230. 2025-07-20 17:43:50,131 - sglang - INFO - [2025-07-20 17:43:50 TP0] Decode batch. #running-req: 1, #token: 3288, token usage: 0.09, gen throughput (token/s): 47.72, #queue-req: 0
  25231. 2025-07-20 17:43:50,132 - __main__ - INFO - sglang running req: 1 queue req: 0
  25232. 2025-07-20 17:43:50,970 - sglang - INFO - [2025-07-20 17:43:50 TP0] Decode batch. #running-req: 1, #token: 3328, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  25233. 2025-07-20 17:43:50,970 - __main__ - INFO - sglang running req: 1 queue req: 0
  25234. 2025-07-20 17:43:51,814 - sglang - INFO - [2025-07-20 17:43:51 TP0] Decode batch. #running-req: 1, #token: 3368, token usage: 0.09, gen throughput (token/s): 47.41, #queue-req: 0
  25235. 2025-07-20 17:43:51,814 - __main__ - INFO - sglang running req: 1 queue req: 0
  25236. 2025-07-20 17:43:52,658 - sglang - INFO - [2025-07-20 17:43:52 TP0] Decode batch. #running-req: 1, #token: 3408, token usage: 0.09, gen throughput (token/s): 47.39, #queue-req: 0
  25237. 2025-07-20 17:43:52,658 - __main__ - INFO - sglang running req: 1 queue req: 0
  25238. 2025-07-20 17:43:53,493 - sglang - INFO - [2025-07-20 17:43:53 TP0] Decode batch. #running-req: 1, #token: 3448, token usage: 0.09, gen throughput (token/s): 47.90, #queue-req: 0
  25239. 2025-07-20 17:43:53,493 - __main__ - INFO - sglang running req: 1 queue req: 0
  25240. 2025-07-20 17:43:54,326 - sglang - INFO - [2025-07-20 17:43:54 TP0] Decode batch. #running-req: 1, #token: 3488, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
  25241. 2025-07-20 17:43:54,326 - __main__ - INFO - sglang running req: 1 queue req: 0
  25242. 2025-07-20 17:43:55,165 - sglang - INFO - [2025-07-20 17:43:55 TP0] Decode batch. #running-req: 1, #token: 3528, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
  25243. 2025-07-20 17:43:55,165 - __main__ - INFO - sglang running req: 1 queue req: 0
  25244. 2025-07-20 17:43:56,004 - sglang - INFO - [2025-07-20 17:43:56 TP0] Decode batch. #running-req: 1, #token: 3568, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  25245. 2025-07-20 17:43:56,004 - __main__ - INFO - sglang running req: 1 queue req: 0
  25246. 2025-07-20 17:43:56,842 - sglang - INFO - [2025-07-20 17:43:56 TP0] Decode batch. #running-req: 1, #token: 3608, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
  25247. 2025-07-20 17:43:56,843 - __main__ - INFO - sglang running req: 1 queue req: 0
  25248. 2025-07-20 17:43:56,940 - __main__ - INFO - Queue remaining: 0
  25249. 2025-07-20 17:43:56,941 - __main__ - INFO -
  25250. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25251. ----------------------------------------------------------------------------------
  25252. finished_input_tokens 409.09 358.35
  25253. finished_output_tokens 98.25 86.68
  25254. sglang_input_tokens 433.92 432.38
  25255. sglang_output_tokens 114.58 119.35
  25256. 2025-07-20 17:43:56,941 - __main__ - INFO -
  25257. Worker ID | finished | started
  25258. ----------+----------+--------
  25259. 0 | 8 | 9
  25260. 2025-07-20 17:43:57,683 - sglang - INFO - [2025-07-20 17:43:57 TP0] Decode batch. #running-req: 1, #token: 3648, token usage: 0.10, gen throughput (token/s): 47.56, #queue-req: 0
  25261. 2025-07-20 17:43:57,684 - __main__ - INFO - sglang running req: 1 queue req: 0
  25262. 2025-07-20 17:43:58,528 - sglang - INFO - [2025-07-20 17:43:58 TP0] Decode batch. #running-req: 1, #token: 3688, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
  25263. 2025-07-20 17:43:58,528 - __main__ - INFO - sglang running req: 1 queue req: 0
  25264. 2025-07-20 17:43:59,374 - sglang - INFO - [2025-07-20 17:43:59 TP0] Decode batch. #running-req: 1, #token: 3728, token usage: 0.10, gen throughput (token/s): 47.27, #queue-req: 0
  25265. 2025-07-20 17:43:59,374 - __main__ - INFO - sglang running req: 1 queue req: 0
  25266. 2025-07-20 17:44:00,212 - sglang - INFO - [2025-07-20 17:44:00 TP0] Decode batch. #running-req: 1, #token: 3768, token usage: 0.10, gen throughput (token/s): 47.73, #queue-req: 0
  25267. 2025-07-20 17:44:00,212 - __main__ - INFO - sglang running req: 1 queue req: 0
  25268. 2025-07-20 17:44:01,047 - sglang - INFO - [2025-07-20 17:44:01 TP0] Decode batch. #running-req: 1, #token: 3808, token usage: 0.10, gen throughput (token/s): 47.93, #queue-req: 0
  25269. 2025-07-20 17:44:01,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  25270. 2025-07-20 17:44:01,884 - sglang - INFO - [2025-07-20 17:44:01 TP0] Decode batch. #running-req: 1, #token: 3848, token usage: 0.10, gen throughput (token/s): 47.77, #queue-req: 0
  25271. 2025-07-20 17:44:01,884 - __main__ - INFO - sglang running req: 1 queue req: 0
  25272. 2025-07-20 17:44:02,726 - sglang - INFO - [2025-07-20 17:44:02 TP0] Decode batch. #running-req: 1, #token: 3888, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
  25273. 2025-07-20 17:44:02,726 - __main__ - INFO - sglang running req: 1 queue req: 0
  25274. 2025-07-20 17:44:03,569 - sglang - INFO - [2025-07-20 17:44:03 TP0] Decode batch. #running-req: 1, #token: 3928, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
  25275. 2025-07-20 17:44:03,569 - __main__ - INFO - sglang running req: 1 queue req: 0
  25276. 2025-07-20 17:44:04,414 - sglang - INFO - [2025-07-20 17:44:04 TP0] Decode batch. #running-req: 1, #token: 3968, token usage: 0.10, gen throughput (token/s): 47.31, #queue-req: 0
  25277. 2025-07-20 17:44:04,414 - __main__ - INFO - sglang running req: 1 queue req: 0
  25278. 2025-07-20 17:44:05,259 - sglang - INFO - [2025-07-20 17:44:05 TP0] Decode batch. #running-req: 1, #token: 4008, token usage: 0.11, gen throughput (token/s): 47.33, #queue-req: 0
  25279. 2025-07-20 17:44:05,259 - __main__ - INFO - sglang running req: 1 queue req: 0
  25280. 2025-07-20 17:44:06,105 - sglang - INFO - [2025-07-20 17:44:06 TP0] Decode batch. #running-req: 1, #token: 4048, token usage: 0.11, gen throughput (token/s): 47.27, #queue-req: 0
  25281. 2025-07-20 17:44:06,105 - __main__ - INFO - sglang running req: 1 queue req: 0
  25282. 2025-07-20 17:44:06,944 - __main__ - INFO - Queue remaining: 0
  25283. 2025-07-20 17:44:06,944 - __main__ - INFO -
  25284. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25285. ----------------------------------------------------------------------------------
  25286. finished_input_tokens 405.56 358.35
  25287. finished_output_tokens 97.40 86.68
  25288. sglang_input_tokens 430.18 432.38
  25289. sglang_output_tokens 113.60 119.35
  25290. 2025-07-20 17:44:06,944 - __main__ - INFO -
  25291. Worker ID | finished | started
  25292. ----------+----------+--------
  25293. 0 | 8 | 9
  25294. 2025-07-20 17:44:06,944 - sglang - INFO - [2025-07-20 17:44:06 TP0] Decode batch. #running-req: 1, #token: 4088, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
  25295. 2025-07-20 17:44:06,944 - __main__ - INFO - sglang running req: 1 queue req: 0
  25296. 2025-07-20 17:44:07,781 - sglang - INFO - [2025-07-20 17:44:07 TP0] Decode batch. #running-req: 1, #token: 4128, token usage: 0.11, gen throughput (token/s): 47.84, #queue-req: 0
  25297. 2025-07-20 17:44:07,781 - __main__ - INFO - sglang running req: 1 queue req: 0
  25298. 2025-07-20 17:44:08,618 - sglang - INFO - [2025-07-20 17:44:08 TP0] Decode batch. #running-req: 1, #token: 4168, token usage: 0.11, gen throughput (token/s): 47.74, #queue-req: 0
  25299. 2025-07-20 17:44:08,618 - __main__ - INFO - sglang running req: 1 queue req: 0
  25300. 2025-07-20 17:44:09,462 - sglang - INFO - [2025-07-20 17:44:09 TP0] Decode batch. #running-req: 1, #token: 4208, token usage: 0.11, gen throughput (token/s): 47.40, #queue-req: 0
  25301. 2025-07-20 17:44:09,462 - __main__ - INFO - sglang running req: 1 queue req: 0
  25302. 2025-07-20 17:44:10,307 - sglang - INFO - [2025-07-20 17:44:10 TP0] Decode batch. #running-req: 1, #token: 4248, token usage: 0.11, gen throughput (token/s): 47.36, #queue-req: 0
  25303. 2025-07-20 17:44:10,307 - __main__ - INFO - sglang running req: 1 queue req: 0
  25304. 2025-07-20 17:44:11,149 - sglang - INFO - [2025-07-20 17:44:11 TP0] Decode batch. #running-req: 1, #token: 4288, token usage: 0.11, gen throughput (token/s): 47.46, #queue-req: 0
  25305. 2025-07-20 17:44:11,150 - __main__ - INFO - sglang running req: 1 queue req: 0
  25306. 2025-07-20 17:44:11,992 - sglang - INFO - [2025-07-20 17:44:11 TP0] Decode batch. #running-req: 1, #token: 4328, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  25307. 2025-07-20 17:44:11,992 - __main__ - INFO - sglang running req: 1 queue req: 0
  25308. 2025-07-20 17:44:12,839 - sglang - INFO - [2025-07-20 17:44:12 TP0] Decode batch. #running-req: 1, #token: 4368, token usage: 0.11, gen throughput (token/s): 47.20, #queue-req: 0
  25309. 2025-07-20 17:44:12,840 - __main__ - INFO - sglang running req: 1 queue req: 0
  25310. 2025-07-20 17:44:13,682 - sglang - INFO - [2025-07-20 17:44:13 TP0] Decode batch. #running-req: 1, #token: 4408, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  25311. 2025-07-20 17:44:13,682 - __main__ - INFO - sglang running req: 1 queue req: 0
  25312. 2025-07-20 17:44:14,521 - sglang - INFO - [2025-07-20 17:44:14 TP0] Decode batch. #running-req: 1, #token: 4448, token usage: 0.12, gen throughput (token/s): 47.70, #queue-req: 0
  25313. 2025-07-20 17:44:14,521 - __main__ - INFO - sglang running req: 1 queue req: 0
  25314. 2025-07-20 17:44:15,360 - sglang - INFO - [2025-07-20 17:44:15 TP0] Decode batch. #running-req: 1, #token: 4488, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
  25315. 2025-07-20 17:44:15,360 - __main__ - INFO - sglang running req: 1 queue req: 0
  25316. 2025-07-20 17:44:16,205 - sglang - INFO - [2025-07-20 17:44:16 TP0] Decode batch. #running-req: 1, #token: 4528, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
  25317. 2025-07-20 17:44:16,206 - __main__ - INFO - sglang running req: 1 queue req: 0
  25318. 2025-07-20 17:44:16,945 - __main__ - INFO - Queue remaining: 0
  25319. 2025-07-20 17:44:16,946 - __main__ - INFO -
  25320. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25321. ----------------------------------------------------------------------------------
  25322. finished_input_tokens 402.10 358.35
  25323. finished_output_tokens 96.57 86.68
  25324. sglang_input_tokens 426.51 432.38
  25325. sglang_output_tokens 112.63 119.35
  25326. 2025-07-20 17:44:16,946 - __main__ - INFO -
  25327. Worker ID | finished | started
  25328. ----------+----------+--------
  25329. 0 | 8 | 9
  25330. 2025-07-20 17:44:17,047 - sglang - INFO - [2025-07-20 17:44:17 TP0] Decode batch. #running-req: 1, #token: 4568, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  25331. 2025-07-20 17:44:17,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  25332. 2025-07-20 17:44:17,886 - sglang - INFO - [2025-07-20 17:44:17 TP0] Decode batch. #running-req: 1, #token: 4608, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
  25333. 2025-07-20 17:44:17,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  25334. 2025-07-20 17:44:18,724 - sglang - INFO - [2025-07-20 17:44:18 TP0] Decode batch. #running-req: 1, #token: 4648, token usage: 0.12, gen throughput (token/s): 47.73, #queue-req: 0
  25335. 2025-07-20 17:44:18,724 - __main__ - INFO - sglang running req: 1 queue req: 0
  25336. 2025-07-20 17:44:19,571 - sglang - INFO - [2025-07-20 17:44:19 TP0] Decode batch. #running-req: 1, #token: 4688, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
  25337. 2025-07-20 17:44:19,572 - __main__ - INFO - sglang running req: 1 queue req: 0
  25338. 2025-07-20 17:44:20,417 - sglang - INFO - [2025-07-20 17:44:20 TP0] Decode batch. #running-req: 1, #token: 4728, token usage: 0.12, gen throughput (token/s): 47.31, #queue-req: 0
  25339. 2025-07-20 17:44:20,417 - __main__ - INFO - sglang running req: 1 queue req: 0
  25340. 2025-07-20 17:44:21,255 - sglang - INFO - [2025-07-20 17:44:21 TP0] Decode batch. #running-req: 1, #token: 4768, token usage: 0.13, gen throughput (token/s): 47.72, #queue-req: 0
  25341. 2025-07-20 17:44:21,256 - __main__ - INFO - sglang running req: 1 queue req: 0
  25342. 2025-07-20 17:44:22,095 - sglang - INFO - [2025-07-20 17:44:22 TP0] Decode batch. #running-req: 1, #token: 4808, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
  25343. 2025-07-20 17:44:22,095 - __main__ - INFO - sglang running req: 1 queue req: 0
  25344. 2025-07-20 17:44:22,940 - sglang - INFO - [2025-07-20 17:44:22 TP0] Decode batch. #running-req: 1, #token: 4848, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
  25345. 2025-07-20 17:44:22,940 - __main__ - INFO - sglang running req: 1 queue req: 0
  25346. 2025-07-20 17:44:23,779 - sglang - INFO - [2025-07-20 17:44:23 TP0] Decode batch. #running-req: 1, #token: 4888, token usage: 0.13, gen throughput (token/s): 47.66, #queue-req: 0
  25347. 2025-07-20 17:44:23,780 - __main__ - INFO - sglang running req: 1 queue req: 0
  25348. 2025-07-20 17:44:24,617 - sglang - INFO - [2025-07-20 17:44:24 TP0] Decode batch. #running-req: 1, #token: 4928, token usage: 0.13, gen throughput (token/s): 47.77, #queue-req: 0
  25349. 2025-07-20 17:44:24,617 - __main__ - INFO - sglang running req: 1 queue req: 0
  25350. 2025-07-20 17:44:25,454 - sglang - INFO - [2025-07-20 17:44:25 TP0] Decode batch. #running-req: 1, #token: 4968, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
  25351. 2025-07-20 17:44:25,454 - __main__ - INFO - sglang running req: 1 queue req: 0
  25352. 2025-07-20 17:44:26,293 - sglang - INFO - [2025-07-20 17:44:26 TP0] Decode batch. #running-req: 1, #token: 5008, token usage: 0.13, gen throughput (token/s): 47.67, #queue-req: 0
  25353. 2025-07-20 17:44:26,294 - __main__ - INFO - sglang running req: 1 queue req: 0
  25354. 2025-07-20 17:44:26,947 - __main__ - INFO - Queue remaining: 0
  25355. 2025-07-20 17:44:26,947 - __main__ - INFO -
  25356. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25357. ----------------------------------------------------------------------------------
  25358. finished_input_tokens 398.69 358.35
  25359. finished_output_tokens 95.75 86.68
  25360. sglang_input_tokens 422.89 432.38
  25361. sglang_output_tokens 111.67 119.35
  25362. 2025-07-20 17:44:26,948 - __main__ - INFO -
  25363. Worker ID | finished | started
  25364. ----------+----------+--------
  25365. 0 | 8 | 9
  25366. 2025-07-20 17:44:27,141 - sglang - INFO - [2025-07-20 17:44:27 TP0] Decode batch. #running-req: 1, #token: 5048, token usage: 0.13, gen throughput (token/s): 47.19, #queue-req: 0
  25367. 2025-07-20 17:44:27,141 - __main__ - INFO - sglang running req: 1 queue req: 0
  25368. 2025-07-20 17:44:27,982 - sglang - INFO - [2025-07-20 17:44:27 TP0] Decode batch. #running-req: 1, #token: 5088, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
  25369. 2025-07-20 17:44:27,982 - __main__ - INFO - sglang running req: 1 queue req: 0
  25370. 2025-07-20 17:44:28,822 - sglang - INFO - [2025-07-20 17:44:28 TP0] Decode batch. #running-req: 1, #token: 5128, token usage: 0.13, gen throughput (token/s): 47.58, #queue-req: 0
  25371. 2025-07-20 17:44:28,823 - __main__ - INFO - sglang running req: 1 queue req: 0
  25372. 2025-07-20 17:44:29,668 - sglang - INFO - [2025-07-20 17:44:29 TP0] Decode batch. #running-req: 1, #token: 5168, token usage: 0.14, gen throughput (token/s): 47.31, #queue-req: 0
  25373. 2025-07-20 17:44:29,668 - __main__ - INFO - sglang running req: 1 queue req: 0
  25374. 2025-07-20 17:44:30,510 - sglang - INFO - [2025-07-20 17:44:30 TP0] Decode batch. #running-req: 1, #token: 5208, token usage: 0.14, gen throughput (token/s): 47.51, #queue-req: 0
  25375. 2025-07-20 17:44:30,510 - __main__ - INFO - sglang running req: 1 queue req: 0
  25376. 2025-07-20 17:44:31,348 - sglang - INFO - [2025-07-20 17:44:31 TP0] Decode batch. #running-req: 1, #token: 5248, token usage: 0.14, gen throughput (token/s): 47.70, #queue-req: 0
  25377. 2025-07-20 17:44:31,349 - __main__ - INFO - sglang running req: 1 queue req: 0
  25378. 2025-07-20 17:44:32,188 - sglang - INFO - [2025-07-20 17:44:32 TP0] Decode batch. #running-req: 1, #token: 5288, token usage: 0.14, gen throughput (token/s): 47.62, #queue-req: 0
  25379. 2025-07-20 17:44:32,189 - __main__ - INFO - sglang running req: 1 queue req: 0
  25380. 2025-07-20 17:44:33,036 - sglang - INFO - [2025-07-20 17:44:33 TP0] Decode batch. #running-req: 1, #token: 5328, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
  25381. 2025-07-20 17:44:33,036 - __main__ - INFO - sglang running req: 1 queue req: 0
  25382. 2025-07-20 17:44:33,888 - sglang - INFO - [2025-07-20 17:44:33 TP0] Decode batch. #running-req: 1, #token: 5368, token usage: 0.14, gen throughput (token/s): 46.93, #queue-req: 0
  25383. 2025-07-20 17:44:33,889 - __main__ - INFO - sglang running req: 1 queue req: 0
  25384. 2025-07-20 17:44:34,731 - sglang - INFO - [2025-07-20 17:44:34 TP0] Decode batch. #running-req: 1, #token: 5408, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
  25385. 2025-07-20 17:44:34,732 - __main__ - INFO - sglang running req: 1 queue req: 0
  25386. 2025-07-20 17:44:35,573 - sglang - INFO - [2025-07-20 17:44:35 TP0] Decode batch. #running-req: 1, #token: 5448, token usage: 0.14, gen throughput (token/s): 47.55, #queue-req: 0
  25387. 2025-07-20 17:44:35,573 - __main__ - INFO - sglang running req: 1 queue req: 0
  25388. 2025-07-20 17:44:36,419 - sglang - INFO - [2025-07-20 17:44:36 TP0] Decode batch. #running-req: 1, #token: 5488, token usage: 0.14, gen throughput (token/s): 47.26, #queue-req: 0
  25389. 2025-07-20 17:44:36,419 - __main__ - INFO - sglang running req: 1 queue req: 0
  25390. 2025-07-20 17:44:36,949 - __main__ - INFO - Queue remaining: 0
  25391. 2025-07-20 17:44:36,949 - __main__ - INFO -
  25392. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25393. ----------------------------------------------------------------------------------
  25394. finished_input_tokens 395.34 358.35
  25395. finished_output_tokens 94.94 86.68
  25396. sglang_input_tokens 419.34 432.38
  25397. sglang_output_tokens 110.73 119.35
  25398. 2025-07-20 17:44:36,949 - __main__ - INFO -
  25399. Worker ID | finished | started
  25400. ----------+----------+--------
  25401. 0 | 8 | 9
  25402. 2025-07-20 17:44:37,266 - sglang - INFO - [2025-07-20 17:44:37 TP0] Decode batch. #running-req: 1, #token: 5528, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
  25403. 2025-07-20 17:44:37,266 - __main__ - INFO - sglang running req: 1 queue req: 0
  25404. 2025-07-20 17:44:38,113 - sglang - INFO - [2025-07-20 17:44:38 TP0] Decode batch. #running-req: 1, #token: 5568, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
  25405. 2025-07-20 17:44:38,113 - __main__ - INFO - sglang running req: 1 queue req: 0
  25406. 2025-07-20 17:44:38,948 - __main__ - WARNING - JSON decode error on attempt 2 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  25407. 2025-07-20 17:44:39,136 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  25408. 2025-07-20 17:44:39,280 - sglang - INFO - [2025-07-20 17:44:39 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  25409. 2025-07-20 17:44:39,280 - __main__ - INFO - sglang running req: 0 queue req: 0
  25410. 2025-07-20 17:44:40,082 - sglang - INFO - [2025-07-20 17:44:40 TP0] Decode batch. #running-req: 1, #token: 2609, token usage: 0.07, gen throughput (token/s): 20.32, #queue-req: 0
  25411. 2025-07-20 17:44:40,082 - __main__ - INFO - sglang running req: 1 queue req: 0
  25412. 2025-07-20 17:44:40,927 - sglang - INFO - [2025-07-20 17:44:40 TP0] Decode batch. #running-req: 1, #token: 2649, token usage: 0.07, gen throughput (token/s): 47.32, #queue-req: 0
  25413. 2025-07-20 17:44:40,927 - __main__ - INFO - sglang running req: 1 queue req: 0
  25414. 2025-07-20 17:44:41,762 - sglang - INFO - [2025-07-20 17:44:41 TP0] Decode batch. #running-req: 1, #token: 2689, token usage: 0.07, gen throughput (token/s): 47.89, #queue-req: 0
  25415. 2025-07-20 17:44:41,763 - __main__ - INFO - sglang running req: 1 queue req: 0
  25416. 2025-07-20 17:44:42,596 - sglang - INFO - [2025-07-20 17:44:42 TP0] Decode batch. #running-req: 1, #token: 2729, token usage: 0.07, gen throughput (token/s): 47.97, #queue-req: 0
  25417. 2025-07-20 17:44:42,597 - __main__ - INFO - sglang running req: 1 queue req: 0
  25418. 2025-07-20 17:44:43,435 - sglang - INFO - [2025-07-20 17:44:43 TP0] Decode batch. #running-req: 1, #token: 2769, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
  25419. 2025-07-20 17:44:43,436 - __main__ - INFO - sglang running req: 1 queue req: 0
  25420. 2025-07-20 17:44:44,273 - sglang - INFO - [2025-07-20 17:44:44 TP0] Decode batch. #running-req: 1, #token: 2809, token usage: 0.07, gen throughput (token/s): 47.75, #queue-req: 0
  25421. 2025-07-20 17:44:44,273 - __main__ - INFO - sglang running req: 1 queue req: 0
  25422. 2025-07-20 17:44:45,116 - sglang - INFO - [2025-07-20 17:44:45 TP0] Decode batch. #running-req: 1, #token: 2849, token usage: 0.07, gen throughput (token/s): 47.47, #queue-req: 0
  25423. 2025-07-20 17:44:45,116 - __main__ - INFO - sglang running req: 1 queue req: 0
  25424. 2025-07-20 17:44:45,956 - sglang - INFO - [2025-07-20 17:44:45 TP0] Decode batch. #running-req: 1, #token: 2889, token usage: 0.08, gen throughput (token/s): 47.62, #queue-req: 0
  25425. 2025-07-20 17:44:45,956 - __main__ - INFO - sglang running req: 1 queue req: 0
  25426. 2025-07-20 17:44:46,798 - sglang - INFO - [2025-07-20 17:44:46 TP0] Decode batch. #running-req: 1, #token: 2929, token usage: 0.08, gen throughput (token/s): 47.47, #queue-req: 0
  25427. 2025-07-20 17:44:46,799 - __main__ - INFO - sglang running req: 1 queue req: 0
  25428. 2025-07-20 17:44:46,950 - __main__ - INFO - Queue remaining: 0
  25429. 2025-07-20 17:44:46,951 - __main__ - INFO -
  25430. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25431. ----------------------------------------------------------------------------------
  25432. finished_input_tokens 392.05 214.66
  25433. finished_output_tokens 94.15 55.51
  25434. sglang_input_tokens 418.02 297.38
  25435. sglang_output_tokens 112.31 98.17
  25436. 2025-07-20 17:44:46,951 - __main__ - INFO -
  25437. Worker ID | finished | started
  25438. ----------+----------+--------
  25439. 0 | 8 | 9
  25440. 2025-07-20 17:44:47,645 - sglang - INFO - [2025-07-20 17:44:47 TP0] Decode batch. #running-req: 1, #token: 2969, token usage: 0.08, gen throughput (token/s): 47.27, #queue-req: 0
  25441. 2025-07-20 17:44:47,645 - __main__ - INFO - sglang running req: 1 queue req: 0
  25442. 2025-07-20 17:44:48,483 - sglang - INFO - [2025-07-20 17:44:48 TP0] Decode batch. #running-req: 1, #token: 3009, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
  25443. 2025-07-20 17:44:48,484 - __main__ - INFO - sglang running req: 1 queue req: 0
  25444. 2025-07-20 17:44:49,320 - sglang - INFO - [2025-07-20 17:44:49 TP0] Decode batch. #running-req: 1, #token: 3049, token usage: 0.08, gen throughput (token/s): 47.84, #queue-req: 0
  25445. 2025-07-20 17:44:49,320 - __main__ - INFO - sglang running req: 1 queue req: 0
  25446. 2025-07-20 17:44:50,158 - sglang - INFO - [2025-07-20 17:44:50 TP0] Decode batch. #running-req: 1, #token: 3089, token usage: 0.08, gen throughput (token/s): 47.73, #queue-req: 0
  25447. 2025-07-20 17:44:50,158 - __main__ - INFO - sglang running req: 1 queue req: 0
  25448. 2025-07-20 17:44:51,001 - sglang - INFO - [2025-07-20 17:44:51 TP0] Decode batch. #running-req: 1, #token: 3129, token usage: 0.08, gen throughput (token/s): 47.45, #queue-req: 0
  25449. 2025-07-20 17:44:51,001 - __main__ - INFO - sglang running req: 1 queue req: 0
  25450. 2025-07-20 17:44:51,843 - sglang - INFO - [2025-07-20 17:44:51 TP0] Decode batch. #running-req: 1, #token: 3169, token usage: 0.08, gen throughput (token/s): 47.49, #queue-req: 0
  25451. 2025-07-20 17:44:51,843 - __main__ - INFO - sglang running req: 1 queue req: 0
  25452. 2025-07-20 17:44:52,684 - sglang - INFO - [2025-07-20 17:44:52 TP0] Decode batch. #running-req: 1, #token: 3209, token usage: 0.08, gen throughput (token/s): 47.54, #queue-req: 0
  25453. 2025-07-20 17:44:52,685 - __main__ - INFO - sglang running req: 1 queue req: 0
  25454. 2025-07-20 17:44:53,529 - sglang - INFO - [2025-07-20 17:44:53 TP0] Decode batch. #running-req: 1, #token: 3249, token usage: 0.09, gen throughput (token/s): 47.36, #queue-req: 0
  25455. 2025-07-20 17:44:53,529 - __main__ - INFO - sglang running req: 1 queue req: 0
  25456. 2025-07-20 17:44:54,376 - sglang - INFO - [2025-07-20 17:44:54 TP0] Decode batch. #running-req: 1, #token: 3289, token usage: 0.09, gen throughput (token/s): 47.23, #queue-req: 0
  25457. 2025-07-20 17:44:54,376 - __main__ - INFO - sglang running req: 1 queue req: 0
  25458. 2025-07-20 17:44:55,214 - sglang - INFO - [2025-07-20 17:44:55 TP0] Decode batch. #running-req: 1, #token: 3329, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
  25459. 2025-07-20 17:44:55,214 - __main__ - INFO - sglang running req: 1 queue req: 0
  25460. 2025-07-20 17:44:56,048 - sglang - INFO - [2025-07-20 17:44:56 TP0] Decode batch. #running-req: 1, #token: 3369, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
  25461. 2025-07-20 17:44:56,048 - __main__ - INFO - sglang running req: 1 queue req: 0
  25462. 2025-07-20 17:44:56,886 - sglang - INFO - [2025-07-20 17:44:56 TP0] Decode batch. #running-req: 1, #token: 3409, token usage: 0.09, gen throughput (token/s): 47.73, #queue-req: 0
  25463. 2025-07-20 17:44:56,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  25464. 2025-07-20 17:44:56,952 - __main__ - INFO - Queue remaining: 0
  25465. 2025-07-20 17:44:56,952 - __main__ - INFO -
  25466. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25467. ----------------------------------------------------------------------------------
  25468. finished_input_tokens 388.81 214.66
  25469. finished_output_tokens 93.38 55.51
  25470. sglang_input_tokens 414.57 297.38
  25471. sglang_output_tokens 111.38 98.17
  25472. 2025-07-20 17:44:56,953 - __main__ - INFO -
  25473. Worker ID | finished | started
  25474. ----------+----------+--------
  25475. 0 | 8 | 9
  25476. 2025-07-20 17:44:57,729 - sglang - INFO - [2025-07-20 17:44:57 TP0] Decode batch. #running-req: 1, #token: 3449, token usage: 0.09, gen throughput (token/s): 47.45, #queue-req: 0
  25477. 2025-07-20 17:44:57,729 - __main__ - INFO - sglang running req: 1 queue req: 0
  25478. 2025-07-20 17:44:58,570 - sglang - INFO - [2025-07-20 17:44:58 TP0] Decode batch. #running-req: 1, #token: 3489, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
  25479. 2025-07-20 17:44:58,570 - __main__ - INFO - sglang running req: 1 queue req: 0
  25480. 2025-07-20 17:44:59,416 - sglang - INFO - [2025-07-20 17:44:59 TP0] Decode batch. #running-req: 1, #token: 3529, token usage: 0.09, gen throughput (token/s): 47.31, #queue-req: 0
  25481. 2025-07-20 17:44:59,416 - __main__ - INFO - sglang running req: 1 queue req: 0
  25482. 2025-07-20 17:45:00,262 - sglang - INFO - [2025-07-20 17:45:00 TP0] Decode batch. #running-req: 1, #token: 3569, token usage: 0.09, gen throughput (token/s): 47.26, #queue-req: 0
  25483. 2025-07-20 17:45:00,262 - __main__ - INFO - sglang running req: 1 queue req: 0
  25484. 2025-07-20 17:45:01,110 - sglang - INFO - [2025-07-20 17:45:01 TP0] Decode batch. #running-req: 1, #token: 3609, token usage: 0.10, gen throughput (token/s): 47.17, #queue-req: 0
  25485. 2025-07-20 17:45:01,110 - __main__ - INFO - sglang running req: 1 queue req: 0
  25486. 2025-07-20 17:45:01,952 - sglang - INFO - [2025-07-20 17:45:01 TP0] Decode batch. #running-req: 1, #token: 3649, token usage: 0.10, gen throughput (token/s): 47.48, #queue-req: 0
  25487. 2025-07-20 17:45:01,953 - __main__ - INFO - sglang running req: 1 queue req: 0
  25488. 2025-07-20 17:45:02,791 - sglang - INFO - [2025-07-20 17:45:02 TP0] Decode batch. #running-req: 1, #token: 3689, token usage: 0.10, gen throughput (token/s): 47.70, #queue-req: 0
  25489. 2025-07-20 17:45:02,791 - __main__ - INFO - sglang running req: 1 queue req: 0
  25490. 2025-07-20 17:45:03,628 - sglang - INFO - [2025-07-20 17:45:03 TP0] Decode batch. #running-req: 1, #token: 3729, token usage: 0.10, gen throughput (token/s): 47.76, #queue-req: 0
  25491. 2025-07-20 17:45:03,629 - __main__ - INFO - sglang running req: 1 queue req: 0
  25492. 2025-07-20 17:45:04,475 - sglang - INFO - [2025-07-20 17:45:04 TP0] Decode batch. #running-req: 1, #token: 3769, token usage: 0.10, gen throughput (token/s): 47.23, #queue-req: 0
  25493. 2025-07-20 17:45:04,476 - __main__ - INFO - sglang running req: 1 queue req: 0
  25494. 2025-07-20 17:45:05,317 - sglang - INFO - [2025-07-20 17:45:05 TP0] Decode batch. #running-req: 1, #token: 3809, token usage: 0.10, gen throughput (token/s): 47.52, #queue-req: 0
  25495. 2025-07-20 17:45:05,317 - __main__ - INFO - sglang running req: 1 queue req: 0
  25496. 2025-07-20 17:45:06,157 - sglang - INFO - [2025-07-20 17:45:06 TP0] Decode batch. #running-req: 1, #token: 3849, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
  25497. 2025-07-20 17:45:06,157 - __main__ - INFO - sglang running req: 1 queue req: 0
  25498. 2025-07-20 17:45:06,955 - __main__ - INFO - Queue remaining: 0
  25499. 2025-07-20 17:45:06,955 - __main__ - INFO -
  25500. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25501. ----------------------------------------------------------------------------------
  25502. finished_input_tokens 385.63 214.66
  25503. finished_output_tokens 92.61 55.51
  25504. sglang_input_tokens 411.17 297.38
  25505. sglang_output_tokens 110.47 98.17
  25506. 2025-07-20 17:45:06,955 - __main__ - INFO -
  25507. Worker ID | finished | started
  25508. ----------+----------+--------
  25509. 0 | 8 | 9
  25510. 2025-07-20 17:45:06,996 - sglang - INFO - [2025-07-20 17:45:06 TP0] Decode batch. #running-req: 1, #token: 3889, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
  25511. 2025-07-20 17:45:06,996 - __main__ - INFO - sglang running req: 1 queue req: 0
  25512. 2025-07-20 17:45:07,840 - sglang - INFO - [2025-07-20 17:45:07 TP0] Decode batch. #running-req: 1, #token: 3929, token usage: 0.10, gen throughput (token/s): 47.36, #queue-req: 0
  25513. 2025-07-20 17:45:07,841 - __main__ - INFO - sglang running req: 1 queue req: 0
  25514. 2025-07-20 17:45:08,685 - sglang - INFO - [2025-07-20 17:45:08 TP0] Decode batch. #running-req: 1, #token: 3969, token usage: 0.10, gen throughput (token/s): 47.34, #queue-req: 0
  25515. 2025-07-20 17:45:08,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  25516. 2025-07-20 17:45:09,526 - sglang - INFO - [2025-07-20 17:45:09 TP0] Decode batch. #running-req: 1, #token: 4009, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
  25517. 2025-07-20 17:45:09,526 - __main__ - INFO - sglang running req: 1 queue req: 0
  25518. 2025-07-20 17:45:10,362 - sglang - INFO - [2025-07-20 17:45:10 TP0] Decode batch. #running-req: 1, #token: 4049, token usage: 0.11, gen throughput (token/s): 47.79, #queue-req: 0
  25519. 2025-07-20 17:45:10,363 - __main__ - INFO - sglang running req: 1 queue req: 0
  25520. 2025-07-20 17:45:11,204 - sglang - INFO - [2025-07-20 17:45:11 TP0] Decode batch. #running-req: 1, #token: 4089, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
  25521. 2025-07-20 17:45:11,204 - __main__ - INFO - sglang running req: 1 queue req: 0
  25522. 2025-07-20 17:45:12,047 - sglang - INFO - [2025-07-20 17:45:12 TP0] Decode batch. #running-req: 1, #token: 4129, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
  25523. 2025-07-20 17:45:12,047 - __main__ - INFO - sglang running req: 1 queue req: 0
  25524. 2025-07-20 17:45:12,886 - sglang - INFO - [2025-07-20 17:45:12 TP0] Decode batch. #running-req: 1, #token: 4169, token usage: 0.11, gen throughput (token/s): 47.70, #queue-req: 0
  25525. 2025-07-20 17:45:12,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  25526. 2025-07-20 17:45:13,724 - sglang - INFO - [2025-07-20 17:45:13 TP0] Decode batch. #running-req: 1, #token: 4209, token usage: 0.11, gen throughput (token/s): 47.70, #queue-req: 0
  25527. 2025-07-20 17:45:13,725 - __main__ - INFO - sglang running req: 1 queue req: 0
  25528. 2025-07-20 17:45:14,567 - sglang - INFO - [2025-07-20 17:45:14 TP0] Decode batch. #running-req: 1, #token: 4249, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
  25529. 2025-07-20 17:45:14,567 - __main__ - INFO - sglang running req: 1 queue req: 0
  25530. 2025-07-20 17:45:15,416 - sglang - INFO - [2025-07-20 17:45:15 TP0] Decode batch. #running-req: 1, #token: 4289, token usage: 0.11, gen throughput (token/s): 47.12, #queue-req: 0
  25531. 2025-07-20 17:45:15,416 - __main__ - INFO - sglang running req: 1 queue req: 0
  25532. 2025-07-20 17:45:16,258 - sglang - INFO - [2025-07-20 17:45:16 TP0] Decode batch. #running-req: 1, #token: 4329, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
  25533. 2025-07-20 17:45:16,258 - __main__ - INFO - sglang running req: 1 queue req: 0
  25534. 2025-07-20 17:45:16,957 - __main__ - INFO - Queue remaining: 0
  25535. 2025-07-20 17:45:16,957 - __main__ - INFO -
  25536. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25537. ----------------------------------------------------------------------------------
  25538. finished_input_tokens 382.49 214.66
  25539. finished_output_tokens 91.86 55.51
  25540. sglang_input_tokens 407.83 297.38
  25541. sglang_output_tokens 109.57 98.17
  25542. 2025-07-20 17:45:16,957 - __main__ - INFO -
  25543. Worker ID | finished | started
  25544. ----------+----------+--------
  25545. 0 | 8 | 9
  25546. 2025-07-20 17:45:17,096 - sglang - INFO - [2025-07-20 17:45:17 TP0] Decode batch. #running-req: 1, #token: 4369, token usage: 0.12, gen throughput (token/s): 47.77, #queue-req: 0
  25547. 2025-07-20 17:45:17,096 - __main__ - INFO - sglang running req: 1 queue req: 0
  25548. 2025-07-20 17:45:17,938 - sglang - INFO - [2025-07-20 17:45:17 TP0] Decode batch. #running-req: 1, #token: 4409, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
  25549. 2025-07-20 17:45:17,938 - __main__ - INFO - sglang running req: 1 queue req: 0
  25550. 2025-07-20 17:45:18,785 - sglang - INFO - [2025-07-20 17:45:18 TP0] Decode batch. #running-req: 1, #token: 4449, token usage: 0.12, gen throughput (token/s): 47.26, #queue-req: 0
  25551. 2025-07-20 17:45:18,785 - __main__ - INFO - sglang running req: 1 queue req: 0
  25552. 2025-07-20 17:45:19,624 - sglang - INFO - [2025-07-20 17:45:19 TP0] Decode batch. #running-req: 1, #token: 4489, token usage: 0.12, gen throughput (token/s): 47.65, #queue-req: 0
  25553. 2025-07-20 17:45:19,624 - __main__ - INFO - sglang running req: 1 queue req: 0
  25554. 2025-07-20 17:45:20,462 - sglang - INFO - [2025-07-20 17:45:20 TP0] Decode batch. #running-req: 1, #token: 4529, token usage: 0.12, gen throughput (token/s): 47.71, #queue-req: 0
  25555. 2025-07-20 17:45:20,463 - __main__ - INFO - sglang running req: 1 queue req: 0
  25556. 2025-07-20 17:45:21,301 - sglang - INFO - [2025-07-20 17:45:21 TP0] Decode batch. #running-req: 1, #token: 4569, token usage: 0.12, gen throughput (token/s): 47.67, #queue-req: 0
  25557. 2025-07-20 17:45:21,302 - __main__ - INFO - sglang running req: 1 queue req: 0
  25558. 2025-07-20 17:45:22,148 - sglang - INFO - [2025-07-20 17:45:22 TP0] Decode batch. #running-req: 1, #token: 4609, token usage: 0.12, gen throughput (token/s): 47.25, #queue-req: 0
  25559. 2025-07-20 17:45:22,148 - __main__ - INFO - sglang running req: 1 queue req: 0
  25560. 2025-07-20 17:45:22,991 - sglang - INFO - [2025-07-20 17:45:22 TP0] Decode batch. #running-req: 1, #token: 4649, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  25561. 2025-07-20 17:45:22,991 - __main__ - INFO - sglang running req: 1 queue req: 0
  25562. 2025-07-20 17:45:23,830 - sglang - INFO - [2025-07-20 17:45:23 TP0] Decode batch. #running-req: 1, #token: 4689, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
  25563. 2025-07-20 17:45:23,830 - __main__ - INFO - sglang running req: 1 queue req: 0
  25564. 2025-07-20 17:45:24,672 - sglang - INFO - [2025-07-20 17:45:24 TP0] Decode batch. #running-req: 1, #token: 4729, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  25565. 2025-07-20 17:45:24,673 - __main__ - INFO - sglang running req: 1 queue req: 0
  25566. 2025-07-20 17:45:25,521 - sglang - INFO - [2025-07-20 17:45:25 TP0] Decode batch. #running-req: 1, #token: 4769, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
  25567. 2025-07-20 17:45:25,522 - __main__ - INFO - sglang running req: 1 queue req: 0
  25568. 2025-07-20 17:45:26,362 - sglang - INFO - [2025-07-20 17:45:26 TP0] Decode batch. #running-req: 1, #token: 4809, token usage: 0.13, gen throughput (token/s): 47.59, #queue-req: 0
  25569. 2025-07-20 17:45:26,362 - __main__ - INFO - sglang running req: 1 queue req: 0
  25570. 2025-07-20 17:45:26,959 - __main__ - INFO - Queue remaining: 0
  25571. 2025-07-20 17:45:26,959 - __main__ - INFO -
  25572. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25573. ----------------------------------------------------------------------------------
  25574. finished_input_tokens 379.41 214.66
  25575. finished_output_tokens 91.12 55.51
  25576. sglang_input_tokens 404.54 297.38
  25577. sglang_output_tokens 108.69 98.17
  25578. 2025-07-20 17:45:26,959 - __main__ - INFO -
  25579. Worker ID | finished | started
  25580. ----------+----------+--------
  25581. 0 | 8 | 9
  25582. 2025-07-20 17:45:27,202 - sglang - INFO - [2025-07-20 17:45:27 TP0] Decode batch. #running-req: 1, #token: 4849, token usage: 0.13, gen throughput (token/s): 47.61, #queue-req: 0
  25583. 2025-07-20 17:45:27,203 - __main__ - INFO - sglang running req: 1 queue req: 0
  25584. 2025-07-20 17:45:28,044 - sglang - INFO - [2025-07-20 17:45:28 TP0] Decode batch. #running-req: 1, #token: 4889, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
  25585. 2025-07-20 17:45:28,044 - __main__ - INFO - sglang running req: 1 queue req: 0
  25586. 2025-07-20 17:45:28,894 - sglang - INFO - [2025-07-20 17:45:28 TP0] Decode batch. #running-req: 1, #token: 4929, token usage: 0.13, gen throughput (token/s): 47.05, #queue-req: 0
  25587. 2025-07-20 17:45:28,894 - __main__ - INFO - sglang running req: 1 queue req: 0
  25588. 2025-07-20 17:45:29,745 - sglang - INFO - [2025-07-20 17:45:29 TP0] Decode batch. #running-req: 1, #token: 4969, token usage: 0.13, gen throughput (token/s): 47.02, #queue-req: 0
  25589. 2025-07-20 17:45:29,745 - __main__ - INFO - sglang running req: 1 queue req: 0
  25590. 2025-07-20 17:45:30,587 - sglang - INFO - [2025-07-20 17:45:30 TP0] Decode batch. #running-req: 1, #token: 5009, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
  25591. 2025-07-20 17:45:30,587 - __main__ - INFO - sglang running req: 1 queue req: 0
  25592. 2025-07-20 17:45:31,428 - sglang - INFO - [2025-07-20 17:45:31 TP0] Decode batch. #running-req: 1, #token: 5049, token usage: 0.13, gen throughput (token/s): 47.53, #queue-req: 0
  25593. 2025-07-20 17:45:31,428 - __main__ - INFO - sglang running req: 1 queue req: 0
  25594. 2025-07-20 17:45:32,274 - sglang - INFO - [2025-07-20 17:45:32 TP0] Decode batch. #running-req: 1, #token: 5089, token usage: 0.13, gen throughput (token/s): 47.26, #queue-req: 0
  25595. 2025-07-20 17:45:32,275 - __main__ - INFO - sglang running req: 1 queue req: 0
  25596. 2025-07-20 17:45:33,120 - sglang - INFO - [2025-07-20 17:45:33 TP0] Decode batch. #running-req: 1, #token: 5129, token usage: 0.14, gen throughput (token/s): 47.32, #queue-req: 0
  25597. 2025-07-20 17:45:33,120 - __main__ - INFO - sglang running req: 1 queue req: 0
  25598. 2025-07-20 17:45:33,965 - sglang - INFO - [2025-07-20 17:45:33 TP0] Decode batch. #running-req: 1, #token: 5169, token usage: 0.14, gen throughput (token/s): 47.31, #queue-req: 0
  25599. 2025-07-20 17:45:33,965 - __main__ - INFO - sglang running req: 1 queue req: 0
  25600. 2025-07-20 17:45:34,812 - sglang - INFO - [2025-07-20 17:45:34 TP0] Decode batch. #running-req: 1, #token: 5209, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
  25601. 2025-07-20 17:45:34,813 - __main__ - INFO - sglang running req: 1 queue req: 0
  25602. 2025-07-20 17:45:35,664 - sglang - INFO - [2025-07-20 17:45:35 TP0] Decode batch. #running-req: 1, #token: 5249, token usage: 0.14, gen throughput (token/s): 46.99, #queue-req: 0
  25603. 2025-07-20 17:45:35,664 - __main__ - INFO - sglang running req: 1 queue req: 0
  25604. 2025-07-20 17:45:36,519 - sglang - INFO - [2025-07-20 17:45:36 TP0] Decode batch. #running-req: 1, #token: 5289, token usage: 0.14, gen throughput (token/s): 46.74, #queue-req: 0
  25605. 2025-07-20 17:45:36,520 - __main__ - INFO - sglang running req: 1 queue req: 0
  25606. 2025-07-20 17:45:36,960 - __main__ - INFO - Queue remaining: 0
  25607. 2025-07-20 17:45:36,961 - __main__ - INFO -
  25608. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25609. ----------------------------------------------------------------------------------
  25610. finished_input_tokens 376.37 214.66
  25611. finished_output_tokens 90.39 55.51
  25612. sglang_input_tokens 401.31 297.38
  25613. sglang_output_tokens 107.82 98.17
  25614. 2025-07-20 17:45:36,961 - __main__ - INFO -
  25615. Worker ID | finished | started
  25616. ----------+----------+--------
  25617. 0 | 8 | 9
  25618. 2025-07-20 17:45:37,365 - sglang - INFO - [2025-07-20 17:45:37 TP0] Decode batch. #running-req: 1, #token: 5329, token usage: 0.14, gen throughput (token/s): 47.32, #queue-req: 0
  25619. 2025-07-20 17:45:37,365 - __main__ - INFO - sglang running req: 1 queue req: 0
  25620. 2025-07-20 17:45:38,208 - sglang - INFO - [2025-07-20 17:45:38 TP0] Decode batch. #running-req: 1, #token: 5369, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
  25621. 2025-07-20 17:45:38,208 - __main__ - INFO - sglang running req: 1 queue req: 0
  25622. 2025-07-20 17:45:39,058 - sglang - INFO - [2025-07-20 17:45:39 TP0] Decode batch. #running-req: 1, #token: 5409, token usage: 0.14, gen throughput (token/s): 47.09, #queue-req: 0
  25623. 2025-07-20 17:45:39,058 - __main__ - INFO - sglang running req: 1 queue req: 0
  25624. 2025-07-20 17:45:39,906 - sglang - INFO - [2025-07-20 17:45:39 TP0] Decode batch. #running-req: 1, #token: 5449, token usage: 0.14, gen throughput (token/s): 47.15, #queue-req: 0
  25625. 2025-07-20 17:45:39,906 - __main__ - INFO - sglang running req: 1 queue req: 0
  25626. 2025-07-20 17:45:40,754 - sglang - INFO - [2025-07-20 17:45:40 TP0] Decode batch. #running-req: 1, #token: 5489, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
  25627. 2025-07-20 17:45:40,754 - __main__ - INFO - sglang running req: 1 queue req: 0
  25628. 2025-07-20 17:45:41,604 - sglang - INFO - [2025-07-20 17:45:41 TP0] Decode batch. #running-req: 1, #token: 5529, token usage: 0.15, gen throughput (token/s): 47.07, #queue-req: 0
  25629. 2025-07-20 17:45:41,604 - __main__ - INFO - sglang running req: 1 queue req: 0
  25630. 2025-07-20 17:45:42,456 - sglang - INFO - [2025-07-20 17:45:42 TP0] Decode batch. #running-req: 1, #token: 5569, token usage: 0.15, gen throughput (token/s): 46.95, #queue-req: 0
  25631. 2025-07-20 17:45:42,456 - __main__ - INFO - sglang running req: 1 queue req: 0
  25632. 2025-07-20 17:45:43,285 - __main__ - WARNING - JSON decode error on attempt 3 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  25633. 2025-07-20 17:45:43,473 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  25634. 2025-07-20 17:45:43,635 - sglang - INFO - [2025-07-20 17:45:43 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  25635. 2025-07-20 17:45:43,636 - __main__ - INFO - sglang running req: 0 queue req: 0
  25636. 2025-07-20 17:45:44,458 - sglang - INFO - [2025-07-20 17:45:44 TP0] Decode batch. #running-req: 1, #token: 2610, token usage: 0.07, gen throughput (token/s): 19.97, #queue-req: 0
  25637. 2025-07-20 17:45:44,459 - __main__ - INFO - sglang running req: 1 queue req: 0
  25638. 2025-07-20 17:45:45,291 - sglang - INFO - [2025-07-20 17:45:45 TP0] Decode batch. #running-req: 1, #token: 2650, token usage: 0.07, gen throughput (token/s): 48.04, #queue-req: 0
  25639. 2025-07-20 17:45:45,291 - __main__ - INFO - sglang running req: 1 queue req: 0
  25640. 2025-07-20 17:45:46,129 - sglang - INFO - [2025-07-20 17:45:46 TP0] Decode batch. #running-req: 1, #token: 2690, token usage: 0.07, gen throughput (token/s): 47.72, #queue-req: 0
  25641. 2025-07-20 17:45:46,129 - __main__ - INFO - sglang running req: 1 queue req: 0
  25642. 2025-07-20 17:45:46,962 - __main__ - INFO - Queue remaining: 0
  25643. 2025-07-20 17:45:46,962 - __main__ - INFO -
  25644. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25645. ----------------------------------------------------------------------------------
  25646. finished_input_tokens 373.39 121.86
  25647. finished_output_tokens 89.67 31.49
  25648. sglang_input_tokens 400.19 177.49
  25649. sglang_output_tokens 109.34 74.28
  25650. 2025-07-20 17:45:46,962 - __main__ - INFO -
  25651. Worker ID | finished | started
  25652. ----------+----------+--------
  25653. 0 | 8 | 9
  25654. 2025-07-20 17:45:46,966 - sglang - INFO - [2025-07-20 17:45:46 TP0] Decode batch. #running-req: 1, #token: 2730, token usage: 0.07, gen throughput (token/s): 47.78, #queue-req: 0
  25655. 2025-07-20 17:45:46,966 - __main__ - INFO - sglang running req: 1 queue req: 0
  25656. 2025-07-20 17:45:47,804 - sglang - INFO - [2025-07-20 17:45:47 TP0] Decode batch. #running-req: 1, #token: 2770, token usage: 0.07, gen throughput (token/s): 47.73, #queue-req: 0
  25657. 2025-07-20 17:45:47,805 - __main__ - INFO - sglang running req: 1 queue req: 0
  25658. 2025-07-20 17:45:48,644 - sglang - INFO - [2025-07-20 17:45:48 TP0] Decode batch. #running-req: 1, #token: 2810, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
  25659. 2025-07-20 17:45:48,644 - __main__ - INFO - sglang running req: 1 queue req: 0
  25660. 2025-07-20 17:45:49,486 - sglang - INFO - [2025-07-20 17:45:49 TP0] Decode batch. #running-req: 1, #token: 2850, token usage: 0.08, gen throughput (token/s): 47.48, #queue-req: 0
  25661. 2025-07-20 17:45:49,487 - __main__ - INFO - sglang running req: 1 queue req: 0
  25662. 2025-07-20 17:45:50,329 - sglang - INFO - [2025-07-20 17:45:50 TP0] Decode batch. #running-req: 1, #token: 2890, token usage: 0.08, gen throughput (token/s): 47.45, #queue-req: 0
  25663. 2025-07-20 17:45:50,330 - __main__ - INFO - sglang running req: 1 queue req: 0
  25664. 2025-07-20 17:45:51,168 - sglang - INFO - [2025-07-20 17:45:51 TP0] Decode batch. #running-req: 1, #token: 2930, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
  25665. 2025-07-20 17:45:51,169 - __main__ - INFO - sglang running req: 1 queue req: 0
  25666. 2025-07-20 17:45:52,001 - sglang - INFO - [2025-07-20 17:45:52 TP0] Decode batch. #running-req: 1, #token: 2970, token usage: 0.08, gen throughput (token/s): 48.05, #queue-req: 0
  25667. 2025-07-20 17:45:52,001 - __main__ - INFO - sglang running req: 1 queue req: 0
  25668. 2025-07-20 17:45:52,837 - sglang - INFO - [2025-07-20 17:45:52 TP0] Decode batch. #running-req: 1, #token: 3010, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  25669. 2025-07-20 17:45:52,838 - __main__ - INFO - sglang running req: 1 queue req: 0
  25670. 2025-07-20 17:45:53,679 - sglang - INFO - [2025-07-20 17:45:53 TP0] Decode batch. #running-req: 1, #token: 3050, token usage: 0.08, gen throughput (token/s): 47.53, #queue-req: 0
  25671. 2025-07-20 17:45:53,679 - __main__ - INFO - sglang running req: 1 queue req: 0
  25672. 2025-07-20 17:45:54,517 - sglang - INFO - [2025-07-20 17:45:54 TP0] Decode batch. #running-req: 1, #token: 3090, token usage: 0.08, gen throughput (token/s): 47.72, #queue-req: 0
  25673. 2025-07-20 17:45:54,517 - __main__ - INFO - sglang running req: 1 queue req: 0
  25674. 2025-07-20 17:45:55,359 - sglang - INFO - [2025-07-20 17:45:55 TP0] Decode batch. #running-req: 1, #token: 3130, token usage: 0.08, gen throughput (token/s): 47.51, #queue-req: 0
  25675. 2025-07-20 17:45:55,359 - __main__ - INFO - sglang running req: 1 queue req: 0
  25676. 2025-07-20 17:45:56,203 - sglang - INFO - [2025-07-20 17:45:56 TP0] Decode batch. #running-req: 1, #token: 3170, token usage: 0.08, gen throughput (token/s): 47.39, #queue-req: 0
  25677. 2025-07-20 17:45:56,203 - __main__ - INFO - sglang running req: 1 queue req: 0
  25678. 2025-07-20 17:45:56,964 - __main__ - INFO - Queue remaining: 0
  25679. 2025-07-20 17:45:56,964 - __main__ - INFO -
  25680. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25681. ----------------------------------------------------------------------------------
  25682. finished_input_tokens 370.45 121.86
  25683. finished_output_tokens 88.97 31.49
  25684. sglang_input_tokens 397.04 177.49
  25685. sglang_output_tokens 108.48 74.28
  25686. 2025-07-20 17:45:56,964 - __main__ - INFO -
  25687. Worker ID | finished | started
  25688. ----------+----------+--------
  25689. 0 | 8 | 9
  25690. 2025-07-20 17:45:57,048 - sglang - INFO - [2025-07-20 17:45:57 TP0] Decode batch. #running-req: 1, #token: 3210, token usage: 0.08, gen throughput (token/s): 47.32, #queue-req: 0
  25691. 2025-07-20 17:45:57,049 - __main__ - INFO - sglang running req: 1 queue req: 0
  25692. 2025-07-20 17:45:57,884 - sglang - INFO - [2025-07-20 17:45:57 TP0] Decode batch. #running-req: 1, #token: 3250, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
  25693. 2025-07-20 17:45:57,884 - __main__ - INFO - sglang running req: 1 queue req: 0
  25694. 2025-07-20 17:45:58,717 - sglang - INFO - [2025-07-20 17:45:58 TP0] Decode batch. #running-req: 1, #token: 3290, token usage: 0.09, gen throughput (token/s): 48.01, #queue-req: 0
  25695. 2025-07-20 17:45:58,718 - __main__ - INFO - sglang running req: 1 queue req: 0
  25696. 2025-07-20 17:45:59,555 - sglang - INFO - [2025-07-20 17:45:59 TP0] Decode batch. #running-req: 1, #token: 3330, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
  25697. 2025-07-20 17:45:59,555 - __main__ - INFO - sglang running req: 1 queue req: 0
  25698. 2025-07-20 17:46:00,396 - sglang - INFO - [2025-07-20 17:46:00 TP0] Decode batch. #running-req: 1, #token: 3370, token usage: 0.09, gen throughput (token/s): 47.55, #queue-req: 0
  25699. 2025-07-20 17:46:00,396 - __main__ - INFO - sglang running req: 1 queue req: 0
  25700. 2025-07-20 17:46:01,230 - sglang - INFO - [2025-07-20 17:46:01 TP0] Decode batch. #running-req: 1, #token: 3410, token usage: 0.09, gen throughput (token/s): 47.94, #queue-req: 0
  25701. 2025-07-20 17:46:01,231 - __main__ - INFO - sglang running req: 1 queue req: 0
  25702. 2025-07-20 17:46:02,065 - sglang - INFO - [2025-07-20 17:46:02 TP0] Decode batch. #running-req: 1, #token: 3450, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
  25703. 2025-07-20 17:46:02,065 - __main__ - INFO - sglang running req: 1 queue req: 0
  25704. 2025-07-20 17:46:02,901 - sglang - INFO - [2025-07-20 17:46:02 TP0] Decode batch. #running-req: 1, #token: 3490, token usage: 0.09, gen throughput (token/s): 47.85, #queue-req: 0
  25705. 2025-07-20 17:46:02,901 - __main__ - INFO - sglang running req: 1 queue req: 0
  25706. 2025-07-20 17:46:03,744 - sglang - INFO - [2025-07-20 17:46:03 TP0] Decode batch. #running-req: 1, #token: 3530, token usage: 0.09, gen throughput (token/s): 47.42, #queue-req: 0
  25707. 2025-07-20 17:46:03,744 - __main__ - INFO - sglang running req: 1 queue req: 0
  25708. 2025-07-20 17:46:04,585 - sglang - INFO - [2025-07-20 17:46:04 TP0] Decode batch. #running-req: 1, #token: 3570, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
  25709. 2025-07-20 17:46:04,585 - __main__ - INFO - sglang running req: 1 queue req: 0
  25710. 2025-07-20 17:46:05,419 - sglang - INFO - [2025-07-20 17:46:05 TP0] Decode batch. #running-req: 1, #token: 3610, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
  25711. 2025-07-20 17:46:05,419 - __main__ - INFO - sglang running req: 1 queue req: 0
  25712. 2025-07-20 17:46:06,254 - sglang - INFO - [2025-07-20 17:46:06 TP0] Decode batch. #running-req: 1, #token: 3650, token usage: 0.10, gen throughput (token/s): 47.90, #queue-req: 0
  25713. 2025-07-20 17:46:06,254 - __main__ - INFO - sglang running req: 1 queue req: 0
  25714. 2025-07-20 17:46:06,965 - __main__ - INFO - Queue remaining: 0
  25715. 2025-07-20 17:46:06,966 - __main__ - INFO -
  25716. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25717. ----------------------------------------------------------------------------------
  25718. finished_input_tokens 367.56 121.86
  25719. finished_output_tokens 88.27 31.49
  25720. sglang_input_tokens 393.94 177.49
  25721. sglang_output_tokens 107.64 74.28
  25722. 2025-07-20 17:46:06,966 - __main__ - INFO -
  25723. Worker ID | finished | started
  25724. ----------+----------+--------
  25725. 0 | 8 | 9
  25726. 2025-07-20 17:46:07,096 - sglang - INFO - [2025-07-20 17:46:07 TP0] Decode batch. #running-req: 1, #token: 3690, token usage: 0.10, gen throughput (token/s): 47.49, #queue-req: 0
  25727. 2025-07-20 17:46:07,096 - __main__ - INFO - sglang running req: 1 queue req: 0
  25728. 2025-07-20 17:46:07,932 - sglang - INFO - [2025-07-20 17:46:07 TP0] Decode batch. #running-req: 1, #token: 3730, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
  25729. 2025-07-20 17:46:07,933 - __main__ - INFO - sglang running req: 1 queue req: 0
  25730. 2025-07-20 17:46:08,768 - sglang - INFO - [2025-07-20 17:46:08 TP0] Decode batch. #running-req: 1, #token: 3770, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
  25731. 2025-07-20 17:46:08,769 - __main__ - INFO - sglang running req: 1 queue req: 0
  25732. 2025-07-20 17:46:09,605 - sglang - INFO - [2025-07-20 17:46:09 TP0] Decode batch. #running-req: 1, #token: 3810, token usage: 0.10, gen throughput (token/s): 47.78, #queue-req: 0
  25733. 2025-07-20 17:46:09,606 - __main__ - INFO - sglang running req: 1 queue req: 0
  25734. 2025-07-20 17:46:10,450 - sglang - INFO - [2025-07-20 17:46:10 TP0] Decode batch. #running-req: 1, #token: 3850, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
  25735. 2025-07-20 17:46:10,450 - __main__ - INFO - sglang running req: 1 queue req: 0
  25736. 2025-07-20 17:46:11,290 - sglang - INFO - [2025-07-20 17:46:11 TP0] Decode batch. #running-req: 1, #token: 3890, token usage: 0.10, gen throughput (token/s): 47.63, #queue-req: 0
  25737. 2025-07-20 17:46:11,290 - __main__ - INFO - sglang running req: 1 queue req: 0
  25738. 2025-07-20 17:46:12,125 - sglang - INFO - [2025-07-20 17:46:12 TP0] Decode batch. #running-req: 1, #token: 3930, token usage: 0.10, gen throughput (token/s): 47.87, #queue-req: 0
  25739. 2025-07-20 17:46:12,125 - __main__ - INFO - sglang running req: 1 queue req: 0
  25740. 2025-07-20 17:46:12,962 - sglang - INFO - [2025-07-20 17:46:12 TP0] Decode batch. #running-req: 1, #token: 3970, token usage: 0.10, gen throughput (token/s): 47.82, #queue-req: 0
  25741. 2025-07-20 17:46:12,962 - __main__ - INFO - sglang running req: 1 queue req: 0
  25742. 2025-07-20 17:46:13,803 - sglang - INFO - [2025-07-20 17:46:13 TP0] Decode batch. #running-req: 1, #token: 4010, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
  25743. 2025-07-20 17:46:13,803 - __main__ - INFO - sglang running req: 1 queue req: 0
  25744. 2025-07-20 17:46:14,640 - sglang - INFO - [2025-07-20 17:46:14 TP0] Decode batch. #running-req: 1, #token: 4050, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
  25745. 2025-07-20 17:46:14,641 - __main__ - INFO - sglang running req: 1 queue req: 0
  25746. 2025-07-20 17:46:15,475 - sglang - INFO - [2025-07-20 17:46:15 TP0] Decode batch. #running-req: 1, #token: 4090, token usage: 0.11, gen throughput (token/s): 47.93, #queue-req: 0
  25747. 2025-07-20 17:46:15,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  25748. 2025-07-20 17:46:16,310 - sglang - INFO - [2025-07-20 17:46:16 TP0] Decode batch. #running-req: 1, #token: 4130, token usage: 0.11, gen throughput (token/s): 47.90, #queue-req: 0
  25749. 2025-07-20 17:46:16,310 - __main__ - INFO - sglang running req: 1 queue req: 0
  25750. 2025-07-20 17:46:16,967 - __main__ - INFO - Queue remaining: 0
  25751. 2025-07-20 17:46:16,968 - __main__ - INFO -
  25752. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25753. ----------------------------------------------------------------------------------
  25754. finished_input_tokens 364.71 121.86
  25755. finished_output_tokens 87.59 31.49
  25756. sglang_input_tokens 390.89 177.49
  25757. sglang_output_tokens 106.80 74.28
  25758. 2025-07-20 17:46:16,968 - __main__ - INFO -
  25759. Worker ID | finished | started
  25760. ----------+----------+--------
  25761. 0 | 8 | 9
  25762. 2025-07-20 17:46:17,147 - sglang - INFO - [2025-07-20 17:46:17 TP0] Decode batch. #running-req: 1, #token: 4170, token usage: 0.11, gen throughput (token/s): 47.81, #queue-req: 0
  25763. 2025-07-20 17:46:17,147 - __main__ - INFO - sglang running req: 1 queue req: 0
  25764. 2025-07-20 17:46:17,989 - sglang - INFO - [2025-07-20 17:46:17 TP0] Decode batch. #running-req: 1, #token: 4210, token usage: 0.11, gen throughput (token/s): 47.48, #queue-req: 0
  25765. 2025-07-20 17:46:17,989 - __main__ - INFO - sglang running req: 1 queue req: 0
  25766. 2025-07-20 17:46:18,829 - sglang - INFO - [2025-07-20 17:46:18 TP0] Decode batch. #running-req: 1, #token: 4250, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  25767. 2025-07-20 17:46:18,829 - __main__ - INFO - sglang running req: 1 queue req: 0
  25768. 2025-07-20 17:46:19,669 - sglang - INFO - [2025-07-20 17:46:19 TP0] Decode batch. #running-req: 1, #token: 4290, token usage: 0.11, gen throughput (token/s): 47.63, #queue-req: 0
  25769. 2025-07-20 17:46:19,669 - __main__ - INFO - sglang running req: 1 queue req: 0
  25770. 2025-07-20 17:46:20,510 - sglang - INFO - [2025-07-20 17:46:20 TP0] Decode batch. #running-req: 1, #token: 4330, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
  25771. 2025-07-20 17:46:20,511 - __main__ - INFO - sglang running req: 1 queue req: 0
  25772. 2025-07-20 17:46:21,346 - sglang - INFO - [2025-07-20 17:46:21 TP0] Decode batch. #running-req: 1, #token: 4370, token usage: 0.12, gen throughput (token/s): 47.88, #queue-req: 0
  25773. 2025-07-20 17:46:21,346 - __main__ - INFO - sglang running req: 1 queue req: 0
  25774. 2025-07-20 17:46:22,184 - sglang - INFO - [2025-07-20 17:46:22 TP0] Decode batch. #running-req: 1, #token: 4410, token usage: 0.12, gen throughput (token/s): 47.69, #queue-req: 0
  25775. 2025-07-20 17:46:22,185 - __main__ - INFO - sglang running req: 1 queue req: 0
  25776. 2025-07-20 17:46:23,021 - sglang - INFO - [2025-07-20 17:46:23 TP0] Decode batch. #running-req: 1, #token: 4450, token usage: 0.12, gen throughput (token/s): 47.80, #queue-req: 0
  25777. 2025-07-20 17:46:23,021 - __main__ - INFO - sglang running req: 1 queue req: 0
  25778. 2025-07-20 17:46:23,865 - sglang - INFO - [2025-07-20 17:46:23 TP0] Decode batch. #running-req: 1, #token: 4490, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
  25779. 2025-07-20 17:46:23,865 - __main__ - INFO - sglang running req: 1 queue req: 0
  25780. 2025-07-20 17:46:24,712 - sglang - INFO - [2025-07-20 17:46:24 TP0] Decode batch. #running-req: 1, #token: 4530, token usage: 0.12, gen throughput (token/s): 47.24, #queue-req: 0
  25781. 2025-07-20 17:46:24,712 - __main__ - INFO - sglang running req: 1 queue req: 0
  25782. 2025-07-20 17:46:25,549 - sglang - INFO - [2025-07-20 17:46:25 TP0] Decode batch. #running-req: 1, #token: 4570, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
  25783. 2025-07-20 17:46:25,550 - __main__ - INFO - sglang running req: 1 queue req: 0
  25784. 2025-07-20 17:46:26,387 - sglang - INFO - [2025-07-20 17:46:26 TP0] Decode batch. #running-req: 1, #token: 4610, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
  25785. 2025-07-20 17:46:26,388 - __main__ - INFO - sglang running req: 1 queue req: 0
  25786. 2025-07-20 17:46:26,969 - __main__ - INFO - Queue remaining: 0
  25787. 2025-07-20 17:46:26,969 - __main__ - INFO -
  25788. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25789. ----------------------------------------------------------------------------------
  25790. finished_input_tokens 361.90 121.86
  25791. finished_output_tokens 86.91 31.49
  25792. sglang_input_tokens 387.88 177.49
  25793. sglang_output_tokens 105.98 74.28
  25794. 2025-07-20 17:46:26,969 - __main__ - INFO -
  25795. Worker ID | finished | started
  25796. ----------+----------+--------
  25797. 0 | 8 | 9
  25798. 2025-07-20 17:46:27,230 - sglang - INFO - [2025-07-20 17:46:27 TP0] Decode batch. #running-req: 1, #token: 4650, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  25799. 2025-07-20 17:46:27,231 - __main__ - INFO - sglang running req: 1 queue req: 0
  25800. 2025-07-20 17:46:28,072 - sglang - INFO - [2025-07-20 17:46:28 TP0] Decode batch. #running-req: 1, #token: 4690, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
  25801. 2025-07-20 17:46:28,072 - __main__ - INFO - sglang running req: 1 queue req: 0
  25802. 2025-07-20 17:46:28,915 - sglang - INFO - [2025-07-20 17:46:28 TP0] Decode batch. #running-req: 1, #token: 4730, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
  25803. 2025-07-20 17:46:28,915 - __main__ - INFO - sglang running req: 1 queue req: 0
  25804. 2025-07-20 17:46:29,758 - sglang - INFO - [2025-07-20 17:46:29 TP0] Decode batch. #running-req: 1, #token: 4770, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
  25805. 2025-07-20 17:46:29,758 - __main__ - INFO - sglang running req: 1 queue req: 0
  25806. 2025-07-20 17:46:30,606 - sglang - INFO - [2025-07-20 17:46:30 TP0] Decode batch. #running-req: 1, #token: 4810, token usage: 0.13, gen throughput (token/s): 47.17, #queue-req: 0
  25807. 2025-07-20 17:46:30,606 - __main__ - INFO - sglang running req: 1 queue req: 0
  25808. 2025-07-20 17:46:31,457 - sglang - INFO - [2025-07-20 17:46:31 TP0] Decode batch. #running-req: 1, #token: 4850, token usage: 0.13, gen throughput (token/s): 47.01, #queue-req: 0
  25809. 2025-07-20 17:46:31,457 - __main__ - INFO - sglang running req: 1 queue req: 0
  25810. 2025-07-20 17:46:32,301 - sglang - INFO - [2025-07-20 17:46:32 TP0] Decode batch. #running-req: 1, #token: 4890, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
  25811. 2025-07-20 17:46:32,302 - __main__ - INFO - sglang running req: 1 queue req: 0
  25812. 2025-07-20 17:46:33,139 - sglang - INFO - [2025-07-20 17:46:33 TP0] Decode batch. #running-req: 1, #token: 4930, token usage: 0.13, gen throughput (token/s): 47.75, #queue-req: 0
  25813. 2025-07-20 17:46:33,139 - __main__ - INFO - sglang running req: 1 queue req: 0
  25814. 2025-07-20 17:46:33,983 - sglang - INFO - [2025-07-20 17:46:33 TP0] Decode batch. #running-req: 1, #token: 4970, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
  25815. 2025-07-20 17:46:33,984 - __main__ - INFO - sglang running req: 1 queue req: 0
  25816. 2025-07-20 17:46:34,828 - sglang - INFO - [2025-07-20 17:46:34 TP0] Decode batch. #running-req: 1, #token: 5010, token usage: 0.13, gen throughput (token/s): 47.36, #queue-req: 0
  25817. 2025-07-20 17:46:34,828 - __main__ - INFO - sglang running req: 1 queue req: 0
  25818. 2025-07-20 17:46:35,671 - sglang - INFO - [2025-07-20 17:46:35 TP0] Decode batch. #running-req: 1, #token: 5050, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
  25819. 2025-07-20 17:46:35,671 - __main__ - INFO - sglang running req: 1 queue req: 0
  25820. 2025-07-20 17:46:36,514 - sglang - INFO - [2025-07-20 17:46:36 TP0] Decode batch. #running-req: 1, #token: 5090, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
  25821. 2025-07-20 17:46:36,515 - __main__ - INFO - sglang running req: 1 queue req: 0
  25822. 2025-07-20 17:46:36,971 - __main__ - INFO - Queue remaining: 0
  25823. 2025-07-20 17:46:36,971 - __main__ - INFO -
  25824. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25825. ----------------------------------------------------------------------------------
  25826. finished_input_tokens 359.14 121.86
  25827. finished_output_tokens 86.25 31.49
  25828. sglang_input_tokens 384.92 177.49
  25829. sglang_output_tokens 105.17 74.28
  25830. 2025-07-20 17:46:36,971 - __main__ - INFO -
  25831. Worker ID | finished | started
  25832. ----------+----------+--------
  25833. 0 | 8 | 9
  25834. 2025-07-20 17:46:37,362 - sglang - INFO - [2025-07-20 17:46:37 TP0] Decode batch. #running-req: 1, #token: 5130, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
  25835. 2025-07-20 17:46:37,362 - __main__ - INFO - sglang running req: 1 queue req: 0
  25836. 2025-07-20 17:46:38,213 - sglang - INFO - [2025-07-20 17:46:38 TP0] Decode batch. #running-req: 1, #token: 5170, token usage: 0.14, gen throughput (token/s): 46.99, #queue-req: 0
  25837. 2025-07-20 17:46:38,214 - __main__ - INFO - sglang running req: 1 queue req: 0
  25838. 2025-07-20 17:46:39,059 - sglang - INFO - [2025-07-20 17:46:39 TP0] Decode batch. #running-req: 1, #token: 5210, token usage: 0.14, gen throughput (token/s): 47.34, #queue-req: 0
  25839. 2025-07-20 17:46:39,059 - __main__ - INFO - sglang running req: 1 queue req: 0
  25840. 2025-07-20 17:46:39,898 - sglang - INFO - [2025-07-20 17:46:39 TP0] Decode batch. #running-req: 1, #token: 5250, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
  25841. 2025-07-20 17:46:39,898 - __main__ - INFO - sglang running req: 1 queue req: 0
  25842. 2025-07-20 17:46:40,742 - sglang - INFO - [2025-07-20 17:46:40 TP0] Decode batch. #running-req: 1, #token: 5290, token usage: 0.14, gen throughput (token/s): 47.37, #queue-req: 0
  25843. 2025-07-20 17:46:40,742 - __main__ - INFO - sglang running req: 1 queue req: 0
  25844. 2025-07-20 17:46:41,591 - sglang - INFO - [2025-07-20 17:46:41 TP0] Decode batch. #running-req: 1, #token: 5330, token usage: 0.14, gen throughput (token/s): 47.10, #queue-req: 0
  25845. 2025-07-20 17:46:41,591 - __main__ - INFO - sglang running req: 1 queue req: 0
  25846. 2025-07-20 17:46:42,437 - sglang - INFO - [2025-07-20 17:46:42 TP0] Decode batch. #running-req: 1, #token: 5370, token usage: 0.14, gen throughput (token/s): 47.28, #queue-req: 0
  25847. 2025-07-20 17:46:42,437 - __main__ - INFO - sglang running req: 1 queue req: 0
  25848. 2025-07-20 17:46:43,287 - sglang - INFO - [2025-07-20 17:46:43 TP0] Decode batch. #running-req: 1, #token: 5410, token usage: 0.14, gen throughput (token/s): 47.07, #queue-req: 0
  25849. 2025-07-20 17:46:43,287 - __main__ - INFO - sglang running req: 1 queue req: 0
  25850. 2025-07-20 17:46:44,136 - sglang - INFO - [2025-07-20 17:46:44 TP0] Decode batch. #running-req: 1, #token: 5450, token usage: 0.14, gen throughput (token/s): 47.11, #queue-req: 0
  25851. 2025-07-20 17:46:44,136 - __main__ - INFO - sglang running req: 1 queue req: 0
  25852. 2025-07-20 17:46:44,990 - sglang - INFO - [2025-07-20 17:46:44 TP0] Decode batch. #running-req: 1, #token: 5490, token usage: 0.14, gen throughput (token/s): 46.84, #queue-req: 0
  25853. 2025-07-20 17:46:44,990 - __main__ - INFO - sglang running req: 1 queue req: 0
  25854. 2025-07-20 17:46:45,843 - sglang - INFO - [2025-07-20 17:46:45 TP0] Decode batch. #running-req: 1, #token: 5530, token usage: 0.15, gen throughput (token/s): 46.91, #queue-req: 0
  25855. 2025-07-20 17:46:45,843 - __main__ - INFO - sglang running req: 1 queue req: 0
  25856. 2025-07-20 17:46:46,685 - sglang - INFO - [2025-07-20 17:46:46 TP0] Decode batch. #running-req: 1, #token: 5570, token usage: 0.15, gen throughput (token/s): 47.46, #queue-req: 0
  25857. 2025-07-20 17:46:46,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  25858. 2025-07-20 17:46:46,973 - __main__ - INFO - Queue remaining: 0
  25859. 2025-07-20 17:46:46,974 - __main__ - INFO -
  25860. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25861. ----------------------------------------------------------------------------------
  25862. finished_input_tokens 356.42 121.86
  25863. finished_output_tokens 85.60 31.49
  25864. sglang_input_tokens 382.01 177.49
  25865. sglang_output_tokens 104.38 74.28
  25866. 2025-07-20 17:46:46,974 - __main__ - INFO -
  25867. Worker ID | finished | started
  25868. ----------+----------+--------
  25869. 0 | 8 | 9
  25870. 2025-07-20 17:46:47,471 - __main__ - WARNING - JSON decode error on attempt 4 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  25871. 2025-07-20 17:46:47,661 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  25872. 2025-07-20 17:46:47,823 - sglang - INFO - [2025-07-20 17:46:47 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  25873. 2025-07-20 17:46:47,823 - __main__ - INFO - sglang running req: 0 queue req: 0
  25874. 2025-07-20 17:46:48,666 - sglang - INFO - [2025-07-20 17:46:48 TP0] Decode batch. #running-req: 1, #token: 2611, token usage: 0.07, gen throughput (token/s): 20.19, #queue-req: 0
  25875. 2025-07-20 17:46:48,666 - __main__ - INFO - sglang running req: 1 queue req: 0
  25876. 2025-07-20 17:46:49,504 - sglang - INFO - [2025-07-20 17:46:49 TP0] Decode batch. #running-req: 1, #token: 2651, token usage: 0.07, gen throughput (token/s): 47.74, #queue-req: 0
  25877. 2025-07-20 17:46:49,504 - __main__ - INFO - sglang running req: 1 queue req: 0
  25878. 2025-07-20 17:46:50,343 - sglang - INFO - [2025-07-20 17:46:50 TP0] Decode batch. #running-req: 1, #token: 2691, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
  25879. 2025-07-20 17:46:50,343 - __main__ - INFO - sglang running req: 1 queue req: 0
  25880. 2025-07-20 17:46:51,182 - sglang - INFO - [2025-07-20 17:46:51 TP0] Decode batch. #running-req: 1, #token: 2731, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
  25881. 2025-07-20 17:46:51,183 - __main__ - INFO - sglang running req: 1 queue req: 0
  25882. 2025-07-20 17:46:52,024 - sglang - INFO - [2025-07-20 17:46:52 TP0] Decode batch. #running-req: 1, #token: 2771, token usage: 0.07, gen throughput (token/s): 47.51, #queue-req: 0
  25883. 2025-07-20 17:46:52,025 - __main__ - INFO - sglang running req: 1 queue req: 0
  25884. 2025-07-20 17:46:52,864 - sglang - INFO - [2025-07-20 17:46:52 TP0] Decode batch. #running-req: 1, #token: 2811, token usage: 0.07, gen throughput (token/s): 47.65, #queue-req: 0
  25885. 2025-07-20 17:46:52,864 - __main__ - INFO - sglang running req: 1 queue req: 0
  25886. 2025-07-20 17:46:53,695 - sglang - INFO - [2025-07-20 17:46:53 TP0] Decode batch. #running-req: 1, #token: 2851, token usage: 0.08, gen throughput (token/s): 48.13, #queue-req: 0
  25887. 2025-07-20 17:46:53,695 - __main__ - INFO - sglang running req: 1 queue req: 0
  25888. 2025-07-20 17:46:54,527 - sglang - INFO - [2025-07-20 17:46:54 TP0] Decode batch. #running-req: 1, #token: 2891, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
  25889. 2025-07-20 17:46:54,527 - __main__ - INFO - sglang running req: 1 queue req: 0
  25890. 2025-07-20 17:46:55,365 - sglang - INFO - [2025-07-20 17:46:55 TP0] Decode batch. #running-req: 1, #token: 2931, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
  25891. 2025-07-20 17:46:55,365 - __main__ - INFO - sglang running req: 1 queue req: 0
  25892. 2025-07-20 17:46:56,201 - sglang - INFO - [2025-07-20 17:46:56 TP0] Decode batch. #running-req: 1, #token: 2971, token usage: 0.08, gen throughput (token/s): 47.85, #queue-req: 0
  25893. 2025-07-20 17:46:56,201 - __main__ - INFO - sglang running req: 1 queue req: 0
  25894. 2025-07-20 17:46:56,976 - __main__ - INFO - Queue remaining: 0
  25895. 2025-07-20 17:46:56,976 - __main__ - INFO -
  25896. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25897. ----------------------------------------------------------------------------------
  25898. finished_input_tokens 353.74 0.00
  25899. finished_output_tokens 84.95 0.00
  25900. sglang_input_tokens 381.09 51.19
  25901. sglang_output_tokens 105.85 52.98
  25902. 2025-07-20 17:46:56,976 - __main__ - INFO -
  25903. Worker ID | finished | started
  25904. ----------+----------+--------
  25905. 0 | 8 | 9
  25906. 2025-07-20 17:46:57,036 - sglang - INFO - [2025-07-20 17:46:57 TP0] Decode batch. #running-req: 1, #token: 3011, token usage: 0.08, gen throughput (token/s): 47.89, #queue-req: 0
  25907. 2025-07-20 17:46:57,037 - __main__ - INFO - sglang running req: 1 queue req: 0
  25908. 2025-07-20 17:46:57,871 - sglang - INFO - [2025-07-20 17:46:57 TP0] Decode batch. #running-req: 1, #token: 3051, token usage: 0.08, gen throughput (token/s): 47.93, #queue-req: 0
  25909. 2025-07-20 17:46:57,871 - __main__ - INFO - sglang running req: 1 queue req: 0
  25910. 2025-07-20 17:46:58,714 - sglang - INFO - [2025-07-20 17:46:58 TP0] Decode batch. #running-req: 1, #token: 3091, token usage: 0.08, gen throughput (token/s): 47.43, #queue-req: 0
  25911. 2025-07-20 17:46:58,715 - __main__ - INFO - sglang running req: 1 queue req: 0
  25912. 2025-07-20 17:46:59,552 - sglang - INFO - [2025-07-20 17:46:59 TP0] Decode batch. #running-req: 1, #token: 3131, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
  25913. 2025-07-20 17:46:59,552 - __main__ - INFO - sglang running req: 1 queue req: 0
  25914. 2025-07-20 17:47:00,388 - sglang - INFO - [2025-07-20 17:47:00 TP0] Decode batch. #running-req: 1, #token: 3171, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  25915. 2025-07-20 17:47:00,389 - __main__ - INFO - sglang running req: 1 queue req: 0
  25916. 2025-07-20 17:47:01,220 - sglang - INFO - [2025-07-20 17:47:01 TP0] Decode batch. #running-req: 1, #token: 3211, token usage: 0.08, gen throughput (token/s): 48.08, #queue-req: 0
  25917. 2025-07-20 17:47:01,221 - __main__ - INFO - sglang running req: 1 queue req: 0
  25918. 2025-07-20 17:47:02,061 - sglang - INFO - [2025-07-20 17:47:02 TP0] Decode batch. #running-req: 1, #token: 3251, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
  25919. 2025-07-20 17:47:02,061 - __main__ - INFO - sglang running req: 1 queue req: 0
  25920. 2025-07-20 17:47:02,902 - sglang - INFO - [2025-07-20 17:47:02 TP0] Decode batch. #running-req: 1, #token: 3291, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
  25921. 2025-07-20 17:47:02,902 - __main__ - INFO - sglang running req: 1 queue req: 0
  25922. 2025-07-20 17:47:03,737 - sglang - INFO - [2025-07-20 17:47:03 TP0] Decode batch. #running-req: 1, #token: 3331, token usage: 0.09, gen throughput (token/s): 47.88, #queue-req: 0
  25923. 2025-07-20 17:47:03,737 - __main__ - INFO - sglang running req: 1 queue req: 0
  25924. 2025-07-20 17:47:04,571 - sglang - INFO - [2025-07-20 17:47:04 TP0] Decode batch. #running-req: 1, #token: 3371, token usage: 0.09, gen throughput (token/s): 47.99, #queue-req: 0
  25925. 2025-07-20 17:47:04,571 - __main__ - INFO - sglang running req: 1 queue req: 0
  25926. 2025-07-20 17:47:05,412 - sglang - INFO - [2025-07-20 17:47:05 TP0] Decode batch. #running-req: 1, #token: 3411, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
  25927. 2025-07-20 17:47:05,412 - __main__ - INFO - sglang running req: 1 queue req: 0
  25928. 2025-07-20 17:47:06,253 - sglang - INFO - [2025-07-20 17:47:06 TP0] Decode batch. #running-req: 1, #token: 3451, token usage: 0.09, gen throughput (token/s): 47.57, #queue-req: 0
  25929. 2025-07-20 17:47:06,253 - __main__ - INFO - sglang running req: 1 queue req: 0
  25930. 2025-07-20 17:47:06,978 - __main__ - INFO - Queue remaining: 0
  25931. 2025-07-20 17:47:06,978 - __main__ - INFO -
  25932. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25933. ----------------------------------------------------------------------------------
  25934. finished_input_tokens 351.10 0.00
  25935. finished_output_tokens 84.32 0.00
  25936. sglang_input_tokens 378.25 51.19
  25937. sglang_output_tokens 105.06 52.98
  25938. 2025-07-20 17:47:06,979 - __main__ - INFO -
  25939. Worker ID | finished | started
  25940. ----------+----------+--------
  25941. 0 | 8 | 9
  25942. 2025-07-20 17:47:07,086 - sglang - INFO - [2025-07-20 17:47:07 TP0] Decode batch. #running-req: 1, #token: 3491, token usage: 0.09, gen throughput (token/s): 48.00, #queue-req: 0
  25943. 2025-07-20 17:47:07,086 - __main__ - INFO - sglang running req: 1 queue req: 0
  25944. 2025-07-20 17:47:07,918 - sglang - INFO - [2025-07-20 17:47:07 TP0] Decode batch. #running-req: 1, #token: 3531, token usage: 0.09, gen throughput (token/s): 48.07, #queue-req: 0
  25945. 2025-07-20 17:47:07,918 - __main__ - INFO - sglang running req: 1 queue req: 0
  25946. 2025-07-20 17:47:08,757 - sglang - INFO - [2025-07-20 17:47:08 TP0] Decode batch. #running-req: 1, #token: 3571, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  25947. 2025-07-20 17:47:08,757 - __main__ - INFO - sglang running req: 1 queue req: 0
  25948. 2025-07-20 17:47:09,595 - sglang - INFO - [2025-07-20 17:47:09 TP0] Decode batch. #running-req: 1, #token: 3611, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
  25949. 2025-07-20 17:47:09,596 - __main__ - INFO - sglang running req: 1 queue req: 0
  25950. 2025-07-20 17:47:10,428 - sglang - INFO - [2025-07-20 17:47:10 TP0] Decode batch. #running-req: 1, #token: 3651, token usage: 0.10, gen throughput (token/s): 48.03, #queue-req: 0
  25951. 2025-07-20 17:47:10,428 - __main__ - INFO - sglang running req: 1 queue req: 0
  25952. 2025-07-20 17:47:11,259 - sglang - INFO - [2025-07-20 17:47:11 TP0] Decode batch. #running-req: 1, #token: 3691, token usage: 0.10, gen throughput (token/s): 48.12, #queue-req: 0
  25953. 2025-07-20 17:47:11,260 - __main__ - INFO - sglang running req: 1 queue req: 0
  25954. 2025-07-20 17:47:12,092 - sglang - INFO - [2025-07-20 17:47:12 TP0] Decode batch. #running-req: 1, #token: 3731, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
  25955. 2025-07-20 17:47:12,092 - __main__ - INFO - sglang running req: 1 queue req: 0
  25956. 2025-07-20 17:47:12,932 - sglang - INFO - [2025-07-20 17:47:12 TP0] Decode batch. #running-req: 1, #token: 3771, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
  25957. 2025-07-20 17:47:12,932 - __main__ - INFO - sglang running req: 1 queue req: 0
  25958. 2025-07-20 17:47:13,770 - sglang - INFO - [2025-07-20 17:47:13 TP0] Decode batch. #running-req: 1, #token: 3811, token usage: 0.10, gen throughput (token/s): 47.74, #queue-req: 0
  25959. 2025-07-20 17:47:13,770 - __main__ - INFO - sglang running req: 1 queue req: 0
  25960. 2025-07-20 17:47:14,603 - sglang - INFO - [2025-07-20 17:47:14 TP0] Decode batch. #running-req: 1, #token: 3851, token usage: 0.10, gen throughput (token/s): 48.02, #queue-req: 0
  25961. 2025-07-20 17:47:14,603 - __main__ - INFO - sglang running req: 1 queue req: 0
  25962. 2025-07-20 17:47:15,440 - sglang - INFO - [2025-07-20 17:47:15 TP0] Decode batch. #running-req: 1, #token: 3891, token usage: 0.10, gen throughput (token/s): 47.78, #queue-req: 0
  25963. 2025-07-20 17:47:15,440 - __main__ - INFO - sglang running req: 1 queue req: 0
  25964. 2025-07-20 17:47:16,278 - sglang - INFO - [2025-07-20 17:47:16 TP0] Decode batch. #running-req: 1, #token: 3931, token usage: 0.10, gen throughput (token/s): 47.73, #queue-req: 0
  25965. 2025-07-20 17:47:16,278 - __main__ - INFO - sglang running req: 1 queue req: 0
  25966. 2025-07-20 17:47:16,980 - __main__ - INFO - Queue remaining: 0
  25967. 2025-07-20 17:47:16,980 - __main__ - INFO -
  25968. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  25969. ----------------------------------------------------------------------------------
  25970. finished_input_tokens 348.50 0.00
  25971. finished_output_tokens 83.70 0.00
  25972. sglang_input_tokens 375.45 51.19
  25973. sglang_output_tokens 104.28 52.98
  25974. 2025-07-20 17:47:16,980 - __main__ - INFO -
  25975. Worker ID | finished | started
  25976. ----------+----------+--------
  25977. 0 | 8 | 9
  25978. 2025-07-20 17:47:17,113 - sglang - INFO - [2025-07-20 17:47:17 TP0] Decode batch. #running-req: 1, #token: 3971, token usage: 0.10, gen throughput (token/s): 47.91, #queue-req: 0
  25979. 2025-07-20 17:47:17,114 - __main__ - INFO - sglang running req: 1 queue req: 0
  25980. 2025-07-20 17:47:17,947 - sglang - INFO - [2025-07-20 17:47:17 TP0] Decode batch. #running-req: 1, #token: 4011, token usage: 0.11, gen throughput (token/s): 47.95, #queue-req: 0
  25981. 2025-07-20 17:47:17,948 - __main__ - INFO - sglang running req: 1 queue req: 0
  25982. 2025-07-20 17:47:18,785 - sglang - INFO - [2025-07-20 17:47:18 TP0] Decode batch. #running-req: 1, #token: 4051, token usage: 0.11, gen throughput (token/s): 47.79, #queue-req: 0
  25983. 2025-07-20 17:47:18,785 - __main__ - INFO - sglang running req: 1 queue req: 0
  25984. 2025-07-20 17:47:19,630 - sglang - INFO - [2025-07-20 17:47:19 TP0] Decode batch. #running-req: 1, #token: 4091, token usage: 0.11, gen throughput (token/s): 47.31, #queue-req: 0
  25985. 2025-07-20 17:47:19,630 - __main__ - INFO - sglang running req: 1 queue req: 0
  25986. 2025-07-20 17:47:20,471 - sglang - INFO - [2025-07-20 17:47:20 TP0] Decode batch. #running-req: 1, #token: 4131, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
  25987. 2025-07-20 17:47:20,471 - __main__ - INFO - sglang running req: 1 queue req: 0
  25988. 2025-07-20 17:47:21,304 - sglang - INFO - [2025-07-20 17:47:21 TP0] Decode batch. #running-req: 1, #token: 4171, token usage: 0.11, gen throughput (token/s): 48.00, #queue-req: 0
  25989. 2025-07-20 17:47:21,305 - __main__ - INFO - sglang running req: 1 queue req: 0
  25990. 2025-07-20 17:47:22,142 - sglang - INFO - [2025-07-20 17:47:22 TP0] Decode batch. #running-req: 1, #token: 4211, token usage: 0.11, gen throughput (token/s): 47.75, #queue-req: 0
  25991. 2025-07-20 17:47:22,142 - __main__ - INFO - sglang running req: 1 queue req: 0
  25992. 2025-07-20 17:47:22,985 - sglang - INFO - [2025-07-20 17:47:22 TP0] Decode batch. #running-req: 1, #token: 4251, token usage: 0.11, gen throughput (token/s): 47.44, #queue-req: 0
  25993. 2025-07-20 17:47:22,985 - __main__ - INFO - sglang running req: 1 queue req: 0
  25994. 2025-07-20 17:47:23,825 - sglang - INFO - [2025-07-20 17:47:23 TP0] Decode batch. #running-req: 1, #token: 4291, token usage: 0.11, gen throughput (token/s): 47.61, #queue-req: 0
  25995. 2025-07-20 17:47:23,826 - __main__ - INFO - sglang running req: 1 queue req: 0
  25996. 2025-07-20 17:47:24,666 - sglang - INFO - [2025-07-20 17:47:24 TP0] Decode batch. #running-req: 1, #token: 4331, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
  25997. 2025-07-20 17:47:24,666 - __main__ - INFO - sglang running req: 1 queue req: 0
  25998. 2025-07-20 17:47:25,510 - sglang - INFO - [2025-07-20 17:47:25 TP0] Decode batch. #running-req: 1, #token: 4371, token usage: 0.12, gen throughput (token/s): 47.41, #queue-req: 0
  25999. 2025-07-20 17:47:25,510 - __main__ - INFO - sglang running req: 1 queue req: 0
  26000. 2025-07-20 17:47:26,357 - sglang - INFO - [2025-07-20 17:47:26 TP0] Decode batch. #running-req: 1, #token: 4411, token usage: 0.12, gen throughput (token/s): 47.19, #queue-req: 0
  26001. 2025-07-20 17:47:26,358 - __main__ - INFO - sglang running req: 1 queue req: 0
  26002. 2025-07-20 17:47:26,981 - __main__ - INFO - Queue remaining: 0
  26003. 2025-07-20 17:47:26,981 - __main__ - INFO -
  26004. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26005. ----------------------------------------------------------------------------------
  26006. finished_input_tokens 345.94 0.00
  26007. finished_output_tokens 83.08 0.00
  26008. sglang_input_tokens 372.69 51.19
  26009. sglang_output_tokens 103.51 52.98
  26010. 2025-07-20 17:47:26,982 - __main__ - INFO -
  26011. Worker ID | finished | started
  26012. ----------+----------+--------
  26013. 0 | 8 | 9
  26014. 2025-07-20 17:47:27,201 - sglang - INFO - [2025-07-20 17:47:27 TP0] Decode batch. #running-req: 1, #token: 4451, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
  26015. 2025-07-20 17:47:27,201 - __main__ - INFO - sglang running req: 1 queue req: 0
  26016. 2025-07-20 17:47:28,036 - sglang - INFO - [2025-07-20 17:47:28 TP0] Decode batch. #running-req: 1, #token: 4491, token usage: 0.12, gen throughput (token/s): 47.89, #queue-req: 0
  26017. 2025-07-20 17:47:28,036 - __main__ - INFO - sglang running req: 1 queue req: 0
  26018. 2025-07-20 17:47:28,872 - sglang - INFO - [2025-07-20 17:47:28 TP0] Decode batch. #running-req: 1, #token: 4531, token usage: 0.12, gen throughput (token/s): 47.82, #queue-req: 0
  26019. 2025-07-20 17:47:28,873 - __main__ - INFO - sglang running req: 1 queue req: 0
  26020. 2025-07-20 17:47:29,716 - sglang - INFO - [2025-07-20 17:47:29 TP0] Decode batch. #running-req: 1, #token: 4571, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
  26021. 2025-07-20 17:47:29,716 - __main__ - INFO - sglang running req: 1 queue req: 0
  26022. 2025-07-20 17:47:30,557 - sglang - INFO - [2025-07-20 17:47:30 TP0] Decode batch. #running-req: 1, #token: 4611, token usage: 0.12, gen throughput (token/s): 47.55, #queue-req: 0
  26023. 2025-07-20 17:47:30,557 - __main__ - INFO - sglang running req: 1 queue req: 0
  26024. 2025-07-20 17:47:31,399 - sglang - INFO - [2025-07-20 17:47:31 TP0] Decode batch. #running-req: 1, #token: 4651, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
  26025. 2025-07-20 17:47:31,399 - __main__ - INFO - sglang running req: 1 queue req: 0
  26026. 2025-07-20 17:47:32,245 - sglang - INFO - [2025-07-20 17:47:32 TP0] Decode batch. #running-req: 1, #token: 4691, token usage: 0.12, gen throughput (token/s): 47.29, #queue-req: 0
  26027. 2025-07-20 17:47:32,245 - __main__ - INFO - sglang running req: 1 queue req: 0
  26028. 2025-07-20 17:47:33,093 - sglang - INFO - [2025-07-20 17:47:33 TP0] Decode batch. #running-req: 1, #token: 4731, token usage: 0.12, gen throughput (token/s): 47.14, #queue-req: 0
  26029. 2025-07-20 17:47:33,094 - __main__ - INFO - sglang running req: 1 queue req: 0
  26030. 2025-07-20 17:47:33,942 - sglang - INFO - [2025-07-20 17:47:33 TP0] Decode batch. #running-req: 1, #token: 4771, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
  26031. 2025-07-20 17:47:33,943 - __main__ - INFO - sglang running req: 1 queue req: 0
  26032. 2025-07-20 17:47:34,783 - sglang - INFO - [2025-07-20 17:47:34 TP0] Decode batch. #running-req: 1, #token: 4811, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
  26033. 2025-07-20 17:47:34,784 - __main__ - INFO - sglang running req: 1 queue req: 0
  26034. 2025-07-20 17:47:35,620 - sglang - INFO - [2025-07-20 17:47:35 TP0] Decode batch. #running-req: 1, #token: 4851, token usage: 0.13, gen throughput (token/s): 47.81, #queue-req: 0
  26035. 2025-07-20 17:47:35,620 - __main__ - INFO - sglang running req: 1 queue req: 0
  26036. 2025-07-20 17:47:36,463 - sglang - INFO - [2025-07-20 17:47:36 TP0] Decode batch. #running-req: 1, #token: 4891, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
  26037. 2025-07-20 17:47:36,464 - __main__ - INFO - sglang running req: 1 queue req: 0
  26038. 2025-07-20 17:47:36,983 - __main__ - INFO - Queue remaining: 0
  26039. 2025-07-20 17:47:36,983 - __main__ - INFO -
  26040. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26041. ----------------------------------------------------------------------------------
  26042. finished_input_tokens 343.42 0.00
  26043. finished_output_tokens 82.47 0.00
  26044. sglang_input_tokens 369.97 51.19
  26045. sglang_output_tokens 102.76 52.98
  26046. 2025-07-20 17:47:36,983 - __main__ - INFO -
  26047. Worker ID | finished | started
  26048. ----------+----------+--------
  26049. 0 | 8 | 9
  26050. 2025-07-20 17:47:37,305 - sglang - INFO - [2025-07-20 17:47:37 TP0] Decode batch. #running-req: 1, #token: 4931, token usage: 0.13, gen throughput (token/s): 47.52, #queue-req: 0
  26051. 2025-07-20 17:47:37,305 - __main__ - INFO - sglang running req: 1 queue req: 0
  26052. 2025-07-20 17:47:38,151 - sglang - INFO - [2025-07-20 17:47:38 TP0] Decode batch. #running-req: 1, #token: 4971, token usage: 0.13, gen throughput (token/s): 47.28, #queue-req: 0
  26053. 2025-07-20 17:47:38,151 - __main__ - INFO - sglang running req: 1 queue req: 0
  26054. 2025-07-20 17:47:38,996 - sglang - INFO - [2025-07-20 17:47:38 TP0] Decode batch. #running-req: 1, #token: 5011, token usage: 0.13, gen throughput (token/s): 47.32, #queue-req: 0
  26055. 2025-07-20 17:47:38,997 - __main__ - INFO - sglang running req: 1 queue req: 0
  26056. 2025-07-20 17:47:39,845 - sglang - INFO - [2025-07-20 17:47:39 TP0] Decode batch. #running-req: 1, #token: 5051, token usage: 0.13, gen throughput (token/s): 47.14, #queue-req: 0
  26057. 2025-07-20 17:47:39,845 - __main__ - INFO - sglang running req: 1 queue req: 0
  26058. 2025-07-20 17:47:40,695 - sglang - INFO - [2025-07-20 17:47:40 TP0] Decode batch. #running-req: 1, #token: 5091, token usage: 0.13, gen throughput (token/s): 47.06, #queue-req: 0
  26059. 2025-07-20 17:47:40,695 - __main__ - INFO - sglang running req: 1 queue req: 0
  26060. 2025-07-20 17:47:41,536 - sglang - INFO - [2025-07-20 17:47:41 TP0] Decode batch. #running-req: 1, #token: 5131, token usage: 0.14, gen throughput (token/s): 47.57, #queue-req: 0
  26061. 2025-07-20 17:47:41,536 - __main__ - INFO - sglang running req: 1 queue req: 0
  26062. 2025-07-20 17:47:42,373 - sglang - INFO - [2025-07-20 17:47:42 TP0] Decode batch. #running-req: 1, #token: 5171, token usage: 0.14, gen throughput (token/s): 47.77, #queue-req: 0
  26063. 2025-07-20 17:47:42,373 - __main__ - INFO - sglang running req: 1 queue req: 0
  26064. 2025-07-20 17:47:43,217 - sglang - INFO - [2025-07-20 17:47:43 TP0] Decode batch. #running-req: 1, #token: 5211, token usage: 0.14, gen throughput (token/s): 47.39, #queue-req: 0
  26065. 2025-07-20 17:47:43,217 - __main__ - INFO - sglang running req: 1 queue req: 0
  26066. 2025-07-20 17:47:44,060 - sglang - INFO - [2025-07-20 17:47:44 TP0] Decode batch. #running-req: 1, #token: 5251, token usage: 0.14, gen throughput (token/s): 47.43, #queue-req: 0
  26067. 2025-07-20 17:47:44,060 - __main__ - INFO - sglang running req: 1 queue req: 0
  26068. 2025-07-20 17:47:44,908 - sglang - INFO - [2025-07-20 17:47:44 TP0] Decode batch. #running-req: 1, #token: 5291, token usage: 0.14, gen throughput (token/s): 47.19, #queue-req: 0
  26069. 2025-07-20 17:47:44,908 - __main__ - INFO - sglang running req: 1 queue req: 0
  26070. 2025-07-20 17:47:45,756 - sglang - INFO - [2025-07-20 17:47:45 TP0] Decode batch. #running-req: 1, #token: 5331, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
  26071. 2025-07-20 17:47:45,756 - __main__ - INFO - sglang running req: 1 queue req: 0
  26072. 2025-07-20 17:47:46,607 - sglang - INFO - [2025-07-20 17:47:46 TP0] Decode batch. #running-req: 1, #token: 5371, token usage: 0.14, gen throughput (token/s): 47.00, #queue-req: 0
  26073. 2025-07-20 17:47:46,608 - __main__ - INFO - sglang running req: 1 queue req: 0
  26074. 2025-07-20 17:47:46,985 - __main__ - INFO - Queue remaining: 0
  26075. 2025-07-20 17:47:46,985 - __main__ - INFO -
  26076. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26077. ----------------------------------------------------------------------------------
  26078. finished_input_tokens 340.93 0.00
  26079. finished_output_tokens 81.88 0.00
  26080. sglang_input_tokens 367.29 51.19
  26081. sglang_output_tokens 102.01 52.98
  26082. 2025-07-20 17:47:46,985 - __main__ - INFO -
  26083. Worker ID | finished | started
  26084. ----------+----------+--------
  26085. 0 | 8 | 9
  26086. 2025-07-20 17:47:47,460 - sglang - INFO - [2025-07-20 17:47:47 TP0] Decode batch. #running-req: 1, #token: 5411, token usage: 0.14, gen throughput (token/s): 46.91, #queue-req: 0
  26087. 2025-07-20 17:47:47,460 - __main__ - INFO - sglang running req: 1 queue req: 0
  26088. 2025-07-20 17:47:48,304 - sglang - INFO - [2025-07-20 17:47:48 TP0] Decode batch. #running-req: 1, #token: 5451, token usage: 0.14, gen throughput (token/s): 47.37, #queue-req: 0
  26089. 2025-07-20 17:47:48,305 - __main__ - INFO - sglang running req: 1 queue req: 0
  26090. 2025-07-20 17:47:49,145 - sglang - INFO - [2025-07-20 17:47:49 TP0] Decode batch. #running-req: 1, #token: 5491, token usage: 0.14, gen throughput (token/s): 47.58, #queue-req: 0
  26091. 2025-07-20 17:47:49,145 - __main__ - INFO - sglang running req: 1 queue req: 0
  26092. 2025-07-20 17:47:49,991 - sglang - INFO - [2025-07-20 17:47:49 TP0] Decode batch. #running-req: 1, #token: 5531, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
  26093. 2025-07-20 17:47:49,991 - __main__ - INFO - sglang running req: 1 queue req: 0
  26094. 2025-07-20 17:47:50,837 - sglang - INFO - [2025-07-20 17:47:50 TP0] Decode batch. #running-req: 1, #token: 5571, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
  26095. 2025-07-20 17:47:50,837 - __main__ - INFO - sglang running req: 1 queue req: 0
  26096. 2025-07-20 17:47:51,604 - __main__ - WARNING - JSON decode error on attempt 5 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  26097. 2025-07-20 17:47:51,790 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  26098. 2025-07-20 17:47:51,951 - sglang - INFO - [2025-07-20 17:47:51 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26099. 2025-07-20 17:47:51,951 - __main__ - INFO - sglang running req: 0 queue req: 0
  26100. 2025-07-20 17:47:52,814 - sglang - INFO - [2025-07-20 17:47:52 TP0] Decode batch. #running-req: 1, #token: 2612, token usage: 0.07, gen throughput (token/s): 20.23, #queue-req: 0
  26101. 2025-07-20 17:47:52,815 - __main__ - INFO - sglang running req: 1 queue req: 0
  26102. 2025-07-20 17:47:53,654 - sglang - INFO - [2025-07-20 17:47:53 TP0] Decode batch. #running-req: 1, #token: 2652, token usage: 0.07, gen throughput (token/s): 47.68, #queue-req: 0
  26103. 2025-07-20 17:47:53,654 - __main__ - INFO - sglang running req: 1 queue req: 0
  26104. 2025-07-20 17:47:54,493 - sglang - INFO - [2025-07-20 17:47:54 TP0] Decode batch. #running-req: 1, #token: 2692, token usage: 0.07, gen throughput (token/s): 47.63, #queue-req: 0
  26105. 2025-07-20 17:47:54,494 - __main__ - INFO - sglang running req: 1 queue req: 0
  26106. 2025-07-20 17:47:55,324 - sglang - INFO - [2025-07-20 17:47:55 TP0] Decode batch. #running-req: 1, #token: 2732, token usage: 0.07, gen throughput (token/s): 48.13, #queue-req: 0
  26107. 2025-07-20 17:47:55,325 - __main__ - INFO - sglang running req: 1 queue req: 0
  26108. 2025-07-20 17:47:56,156 - sglang - INFO - [2025-07-20 17:47:56 TP0] Decode batch. #running-req: 1, #token: 2772, token usage: 0.07, gen throughput (token/s): 48.09, #queue-req: 0
  26109. 2025-07-20 17:47:56,157 - __main__ - INFO - sglang running req: 1 queue req: 0
  26110. 2025-07-20 17:47:56,987 - __main__ - INFO - Queue remaining: 0
  26111. 2025-07-20 17:47:56,987 - __main__ - INFO -
  26112. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26113. ----------------------------------------------------------------------------------
  26114. finished_input_tokens 338.48 0.00
  26115. finished_output_tokens 81.29 0.00
  26116. sglang_input_tokens 366.53 43.47
  26117. sglang_output_tokens 103.44 50.00
  26118. 2025-07-20 17:47:56,988 - __main__ - INFO -
  26119. Worker ID | finished | started
  26120. ----------+----------+--------
  26121. 0 | 8 | 9
  26122. 2025-07-20 17:47:56,992 - sglang - INFO - [2025-07-20 17:47:56 TP0] Decode batch. #running-req: 1, #token: 2812, token usage: 0.07, gen throughput (token/s): 47.85, #queue-req: 0
  26123. 2025-07-20 17:47:56,992 - __main__ - INFO - sglang running req: 1 queue req: 0
  26124. 2025-07-20 17:47:57,831 - sglang - INFO - [2025-07-20 17:47:57 TP0] Decode batch. #running-req: 1, #token: 2852, token usage: 0.08, gen throughput (token/s): 47.68, #queue-req: 0
  26125. 2025-07-20 17:47:57,831 - __main__ - INFO - sglang running req: 1 queue req: 0
  26126. 2025-07-20 17:47:58,670 - sglang - INFO - [2025-07-20 17:47:58 TP0] Decode batch. #running-req: 1, #token: 2892, token usage: 0.08, gen throughput (token/s): 47.69, #queue-req: 0
  26127. 2025-07-20 17:47:58,670 - __main__ - INFO - sglang running req: 1 queue req: 0
  26128. 2025-07-20 17:47:59,503 - sglang - INFO - [2025-07-20 17:47:59 TP0] Decode batch. #running-req: 1, #token: 2932, token usage: 0.08, gen throughput (token/s): 48.01, #queue-req: 0
  26129. 2025-07-20 17:47:59,503 - __main__ - INFO - sglang running req: 1 queue req: 0
  26130. 2025-07-20 17:48:00,339 - sglang - INFO - [2025-07-20 17:48:00 TP0] Decode batch. #running-req: 1, #token: 2972, token usage: 0.08, gen throughput (token/s): 47.83, #queue-req: 0
  26131. 2025-07-20 17:48:00,340 - __main__ - INFO - sglang running req: 1 queue req: 0
  26132. 2025-07-20 17:48:01,179 - sglang - INFO - [2025-07-20 17:48:01 TP0] Decode batch. #running-req: 1, #token: 3012, token usage: 0.08, gen throughput (token/s): 47.65, #queue-req: 0
  26133. 2025-07-20 17:48:01,179 - __main__ - INFO - sglang running req: 1 queue req: 0
  26134. 2025-07-20 17:48:02,012 - sglang - INFO - [2025-07-20 17:48:02 TP0] Decode batch. #running-req: 1, #token: 3052, token usage: 0.08, gen throughput (token/s): 47.99, #queue-req: 0
  26135. 2025-07-20 17:48:02,013 - __main__ - INFO - sglang running req: 1 queue req: 0
  26136. 2025-07-20 17:48:02,843 - sglang - INFO - [2025-07-20 17:48:02 TP0] Decode batch. #running-req: 1, #token: 3092, token usage: 0.08, gen throughput (token/s): 48.17, #queue-req: 0
  26137. 2025-07-20 17:48:02,843 - __main__ - INFO - sglang running req: 1 queue req: 0
  26138. 2025-07-20 17:48:03,678 - sglang - INFO - [2025-07-20 17:48:03 TP0] Decode batch. #running-req: 1, #token: 3132, token usage: 0.08, gen throughput (token/s): 47.91, #queue-req: 0
  26139. 2025-07-20 17:48:03,678 - __main__ - INFO - sglang running req: 1 queue req: 0
  26140. 2025-07-20 17:48:04,516 - sglang - INFO - [2025-07-20 17:48:04 TP0] Decode batch. #running-req: 1, #token: 3172, token usage: 0.08, gen throughput (token/s): 47.74, #queue-req: 0
  26141. 2025-07-20 17:48:04,516 - __main__ - INFO - sglang running req: 1 queue req: 0
  26142. 2025-07-20 17:48:05,346 - sglang - INFO - [2025-07-20 17:48:05 TP0] Decode batch. #running-req: 1, #token: 3212, token usage: 0.08, gen throughput (token/s): 48.14, #queue-req: 0
  26143. 2025-07-20 17:48:05,347 - __main__ - INFO - sglang running req: 1 queue req: 0
  26144. 2025-07-20 17:48:06,177 - sglang - INFO - [2025-07-20 17:48:06 TP0] Decode batch. #running-req: 1, #token: 3252, token usage: 0.09, gen throughput (token/s): 48.17, #queue-req: 0
  26145. 2025-07-20 17:48:06,177 - __main__ - INFO - sglang running req: 1 queue req: 0
  26146. 2025-07-20 17:48:06,989 - __main__ - INFO - Queue remaining: 0
  26147. 2025-07-20 17:48:06,989 - __main__ - INFO -
  26148. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26149. ----------------------------------------------------------------------------------
  26150. finished_input_tokens 336.06 0.00
  26151. finished_output_tokens 80.71 0.00
  26152. sglang_input_tokens 363.91 43.47
  26153. sglang_output_tokens 102.70 50.00
  26154. 2025-07-20 17:48:06,989 - __main__ - INFO -
  26155. Worker ID | finished | started
  26156. ----------+----------+--------
  26157. 0 | 8 | 9
  26158. 2025-07-20 17:48:07,007 - sglang - INFO - [2025-07-20 17:48:07 TP0] Decode batch. #running-req: 1, #token: 3292, token usage: 0.09, gen throughput (token/s): 48.19, #queue-req: 0
  26159. 2025-07-20 17:48:07,007 - __main__ - INFO - sglang running req: 1 queue req: 0
  26160. 2025-07-20 17:48:07,843 - sglang - INFO - [2025-07-20 17:48:07 TP0] Decode batch. #running-req: 1, #token: 3332, token usage: 0.09, gen throughput (token/s): 47.86, #queue-req: 0
  26161. 2025-07-20 17:48:07,843 - __main__ - INFO - sglang running req: 1 queue req: 0
  26162. 2025-07-20 17:48:08,680 - sglang - INFO - [2025-07-20 17:48:08 TP0] Decode batch. #running-req: 1, #token: 3372, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
  26163. 2025-07-20 17:48:08,681 - __main__ - INFO - sglang running req: 1 queue req: 0
  26164. 2025-07-20 17:48:09,512 - sglang - INFO - [2025-07-20 17:48:09 TP0] Decode batch. #running-req: 1, #token: 3412, token usage: 0.09, gen throughput (token/s): 48.08, #queue-req: 0
  26165. 2025-07-20 17:48:09,513 - __main__ - INFO - sglang running req: 1 queue req: 0
  26166. 2025-07-20 17:48:10,347 - sglang - INFO - [2025-07-20 17:48:10 TP0] Decode batch. #running-req: 1, #token: 3452, token usage: 0.09, gen throughput (token/s): 47.96, #queue-req: 0
  26167. 2025-07-20 17:48:10,347 - __main__ - INFO - sglang running req: 1 queue req: 0
  26168. 2025-07-20 17:48:11,188 - sglang - INFO - [2025-07-20 17:48:11 TP0] Decode batch. #running-req: 1, #token: 3492, token usage: 0.09, gen throughput (token/s): 47.54, #queue-req: 0
  26169. 2025-07-20 17:48:11,188 - __main__ - INFO - sglang running req: 1 queue req: 0
  26170. 2025-07-20 17:48:12,022 - sglang - INFO - [2025-07-20 17:48:12 TP0] Decode batch. #running-req: 1, #token: 3532, token usage: 0.09, gen throughput (token/s): 47.98, #queue-req: 0
  26171. 2025-07-20 17:48:12,022 - __main__ - INFO - sglang running req: 1 queue req: 0
  26172. 2025-07-20 17:48:12,853 - sglang - INFO - [2025-07-20 17:48:12 TP0] Decode batch. #running-req: 1, #token: 3572, token usage: 0.09, gen throughput (token/s): 48.09, #queue-req: 0
  26173. 2025-07-20 17:48:12,854 - __main__ - INFO - sglang running req: 1 queue req: 0
  26174. 2025-07-20 17:48:13,686 - sglang - INFO - [2025-07-20 17:48:13 TP0] Decode batch. #running-req: 1, #token: 3612, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
  26175. 2025-07-20 17:48:13,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  26176. 2025-07-20 17:48:14,526 - sglang - INFO - [2025-07-20 17:48:14 TP0] Decode batch. #running-req: 1, #token: 3652, token usage: 0.10, gen throughput (token/s): 47.60, #queue-req: 0
  26177. 2025-07-20 17:48:14,527 - __main__ - INFO - sglang running req: 1 queue req: 0
  26178. 2025-07-20 17:48:15,370 - sglang - INFO - [2025-07-20 17:48:15 TP0] Decode batch. #running-req: 1, #token: 3692, token usage: 0.10, gen throughput (token/s): 47.39, #queue-req: 0
  26179. 2025-07-20 17:48:15,371 - __main__ - INFO - sglang running req: 1 queue req: 0
  26180. 2025-07-20 17:48:16,204 - sglang - INFO - [2025-07-20 17:48:16 TP0] Decode batch. #running-req: 1, #token: 3732, token usage: 0.10, gen throughput (token/s): 48.00, #queue-req: 0
  26181. 2025-07-20 17:48:16,204 - __main__ - INFO - sglang running req: 1 queue req: 0
  26182. 2025-07-20 17:48:16,990 - __main__ - INFO - Queue remaining: 0
  26183. 2025-07-20 17:48:16,991 - __main__ - INFO -
  26184. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26185. ----------------------------------------------------------------------------------
  26186. finished_input_tokens 333.68 0.00
  26187. finished_output_tokens 80.14 0.00
  26188. sglang_input_tokens 361.33 43.47
  26189. sglang_output_tokens 101.97 50.00
  26190. 2025-07-20 17:48:16,991 - __main__ - INFO -
  26191. Worker ID | finished | started
  26192. ----------+----------+--------
  26193. 0 | 8 | 9
  26194. 2025-07-20 17:48:17,036 - sglang - INFO - [2025-07-20 17:48:17 TP0] Decode batch. #running-req: 1, #token: 3772, token usage: 0.10, gen throughput (token/s): 48.04, #queue-req: 0
  26195. 2025-07-20 17:48:17,037 - __main__ - INFO - sglang running req: 1 queue req: 0
  26196. 2025-07-20 17:48:17,875 - sglang - INFO - [2025-07-20 17:48:17 TP0] Decode batch. #running-req: 1, #token: 3812, token usage: 0.10, gen throughput (token/s): 47.69, #queue-req: 0
  26197. 2025-07-20 17:48:17,875 - __main__ - INFO - sglang running req: 1 queue req: 0
  26198. 2025-07-20 17:48:18,713 - sglang - INFO - [2025-07-20 17:48:18 TP0] Decode batch. #running-req: 1, #token: 3852, token usage: 0.10, gen throughput (token/s): 47.72, #queue-req: 0
  26199. 2025-07-20 17:48:18,714 - __main__ - INFO - sglang running req: 1 queue req: 0
  26200. 2025-07-20 17:48:19,555 - sglang - INFO - [2025-07-20 17:48:19 TP0] Decode batch. #running-req: 1, #token: 3892, token usage: 0.10, gen throughput (token/s): 47.50, #queue-req: 0
  26201. 2025-07-20 17:48:19,556 - __main__ - INFO - sglang running req: 1 queue req: 0
  26202. 2025-07-20 17:48:20,395 - sglang - INFO - [2025-07-20 17:48:20 TP0] Decode batch. #running-req: 1, #token: 3932, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
  26203. 2025-07-20 17:48:20,395 - __main__ - INFO - sglang running req: 1 queue req: 0
  26204. 2025-07-20 17:48:21,238 - sglang - INFO - [2025-07-20 17:48:21 TP0] Decode batch. #running-req: 1, #token: 3972, token usage: 0.10, gen throughput (token/s): 47.42, #queue-req: 0
  26205. 2025-07-20 17:48:21,239 - __main__ - INFO - sglang running req: 1 queue req: 0
  26206. 2025-07-20 17:48:22,085 - sglang - INFO - [2025-07-20 17:48:22 TP0] Decode batch. #running-req: 1, #token: 4012, token usage: 0.11, gen throughput (token/s): 47.26, #queue-req: 0
  26207. 2025-07-20 17:48:22,085 - __main__ - INFO - sglang running req: 1 queue req: 0
  26208. 2025-07-20 17:48:22,924 - sglang - INFO - [2025-07-20 17:48:22 TP0] Decode batch. #running-req: 1, #token: 4052, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
  26209. 2025-07-20 17:48:22,924 - __main__ - INFO - sglang running req: 1 queue req: 0
  26210. 2025-07-20 17:48:23,757 - sglang - INFO - [2025-07-20 17:48:23 TP0] Decode batch. #running-req: 1, #token: 4092, token usage: 0.11, gen throughput (token/s): 47.99, #queue-req: 0
  26211. 2025-07-20 17:48:23,758 - __main__ - INFO - sglang running req: 1 queue req: 0
  26212. 2025-07-20 17:48:24,596 - sglang - INFO - [2025-07-20 17:48:24 TP0] Decode batch. #running-req: 1, #token: 4132, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
  26213. 2025-07-20 17:48:24,596 - __main__ - INFO - sglang running req: 1 queue req: 0
  26214. 2025-07-20 17:48:25,436 - sglang - INFO - [2025-07-20 17:48:25 TP0] Decode batch. #running-req: 1, #token: 4172, token usage: 0.11, gen throughput (token/s): 47.62, #queue-req: 0
  26215. 2025-07-20 17:48:25,436 - __main__ - INFO - sglang running req: 1 queue req: 0
  26216. 2025-07-20 17:48:26,277 - sglang - INFO - [2025-07-20 17:48:26 TP0] Decode batch. #running-req: 1, #token: 4212, token usage: 0.11, gen throughput (token/s): 47.58, #queue-req: 0
  26217. 2025-07-20 17:48:26,277 - __main__ - INFO - sglang running req: 1 queue req: 0
  26218. 2025-07-20 17:48:26,992 - __main__ - INFO - Queue remaining: 0
  26219. 2025-07-20 17:48:26,993 - __main__ - INFO -
  26220. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26221. ----------------------------------------------------------------------------------
  26222. finished_input_tokens 331.33 0.00
  26223. finished_output_tokens 79.57 0.00
  26224. sglang_input_tokens 358.78 43.47
  26225. sglang_output_tokens 101.25 50.00
  26226. 2025-07-20 17:48:26,993 - __main__ - INFO -
  26227. Worker ID | finished | started
  26228. ----------+----------+--------
  26229. 0 | 8 | 9
  26230. 2025-07-20 17:48:27,119 - sglang - INFO - [2025-07-20 17:48:27 TP0] Decode batch. #running-req: 1, #token: 4252, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  26231. 2025-07-20 17:48:27,120 - __main__ - INFO - sglang running req: 1 queue req: 0
  26232. 2025-07-20 17:48:27,964 - sglang - INFO - [2025-07-20 17:48:27 TP0] Decode batch. #running-req: 1, #token: 4292, token usage: 0.11, gen throughput (token/s): 47.34, #queue-req: 0
  26233. 2025-07-20 17:48:27,964 - __main__ - INFO - sglang running req: 1 queue req: 0
  26234. 2025-07-20 17:48:28,811 - sglang - INFO - [2025-07-20 17:48:28 TP0] Decode batch. #running-req: 1, #token: 4332, token usage: 0.11, gen throughput (token/s): 47.21, #queue-req: 0
  26235. 2025-07-20 17:48:28,812 - __main__ - INFO - sglang running req: 1 queue req: 0
  26236. 2025-07-20 17:48:29,650 - sglang - INFO - [2025-07-20 17:48:29 TP0] Decode batch. #running-req: 1, #token: 4372, token usage: 0.12, gen throughput (token/s): 47.68, #queue-req: 0
  26237. 2025-07-20 17:48:29,650 - __main__ - INFO - sglang running req: 1 queue req: 0
  26238. 2025-07-20 17:48:30,485 - sglang - INFO - [2025-07-20 17:48:30 TP0] Decode batch. #running-req: 1, #token: 4412, token usage: 0.12, gen throughput (token/s): 47.94, #queue-req: 0
  26239. 2025-07-20 17:48:30,485 - __main__ - INFO - sglang running req: 1 queue req: 0
  26240. 2025-07-20 17:48:31,320 - sglang - INFO - [2025-07-20 17:48:31 TP0] Decode batch. #running-req: 1, #token: 4452, token usage: 0.12, gen throughput (token/s): 47.86, #queue-req: 0
  26241. 2025-07-20 17:48:31,321 - __main__ - INFO - sglang running req: 1 queue req: 0
  26242. 2025-07-20 17:48:32,161 - sglang - INFO - [2025-07-20 17:48:32 TP0] Decode batch. #running-req: 1, #token: 4492, token usage: 0.12, gen throughput (token/s): 47.57, #queue-req: 0
  26243. 2025-07-20 17:48:32,162 - __main__ - INFO - sglang running req: 1 queue req: 0
  26244. 2025-07-20 17:48:33,001 - sglang - INFO - [2025-07-20 17:48:33 TP0] Decode batch. #running-req: 1, #token: 4532, token usage: 0.12, gen throughput (token/s): 47.65, #queue-req: 0
  26245. 2025-07-20 17:48:33,001 - __main__ - INFO - sglang running req: 1 queue req: 0
  26246. 2025-07-20 17:48:33,846 - sglang - INFO - [2025-07-20 17:48:33 TP0] Decode batch. #running-req: 1, #token: 4572, token usage: 0.12, gen throughput (token/s): 47.34, #queue-req: 0
  26247. 2025-07-20 17:48:33,846 - __main__ - INFO - sglang running req: 1 queue req: 0
  26248. 2025-07-20 17:48:34,692 - sglang - INFO - [2025-07-20 17:48:34 TP0] Decode batch. #running-req: 1, #token: 4612, token usage: 0.12, gen throughput (token/s): 47.24, #queue-req: 0
  26249. 2025-07-20 17:48:34,693 - __main__ - INFO - sglang running req: 1 queue req: 0
  26250. 2025-07-20 17:48:35,541 - sglang - INFO - [2025-07-20 17:48:35 TP0] Decode batch. #running-req: 1, #token: 4652, token usage: 0.12, gen throughput (token/s): 47.11, #queue-req: 0
  26251. 2025-07-20 17:48:35,542 - __main__ - INFO - sglang running req: 1 queue req: 0
  26252. 2025-07-20 17:48:36,386 - sglang - INFO - [2025-07-20 17:48:36 TP0] Decode batch. #running-req: 1, #token: 4692, token usage: 0.12, gen throughput (token/s): 47.39, #queue-req: 0
  26253. 2025-07-20 17:48:36,386 - __main__ - INFO - sglang running req: 1 queue req: 0
  26254. 2025-07-20 17:48:36,994 - __main__ - INFO - Queue remaining: 0
  26255. 2025-07-20 17:48:36,994 - __main__ - INFO -
  26256. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26257. ----------------------------------------------------------------------------------
  26258. finished_input_tokens 329.01 0.00
  26259. finished_output_tokens 79.01 0.00
  26260. sglang_input_tokens 356.28 43.47
  26261. sglang_output_tokens 100.54 50.00
  26262. 2025-07-20 17:48:36,995 - __main__ - INFO -
  26263. Worker ID | finished | started
  26264. ----------+----------+--------
  26265. 0 | 8 | 9
  26266. 2025-07-20 17:48:37,223 - sglang - INFO - [2025-07-20 17:48:37 TP0] Decode batch. #running-req: 1, #token: 4732, token usage: 0.12, gen throughput (token/s): 47.74, #queue-req: 0
  26267. 2025-07-20 17:48:37,224 - __main__ - INFO - sglang running req: 1 queue req: 0
  26268. 2025-07-20 17:48:38,060 - sglang - INFO - [2025-07-20 17:48:38 TP0] Decode batch. #running-req: 1, #token: 4772, token usage: 0.13, gen throughput (token/s): 47.82, #queue-req: 0
  26269. 2025-07-20 17:48:38,060 - __main__ - INFO - sglang running req: 1 queue req: 0
  26270. 2025-07-20 17:48:38,904 - sglang - INFO - [2025-07-20 17:48:38 TP0] Decode batch. #running-req: 1, #token: 4812, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
  26271. 2025-07-20 17:48:38,904 - __main__ - INFO - sglang running req: 1 queue req: 0
  26272. 2025-07-20 17:48:39,745 - sglang - INFO - [2025-07-20 17:48:39 TP0] Decode batch. #running-req: 1, #token: 4852, token usage: 0.13, gen throughput (token/s): 47.56, #queue-req: 0
  26273. 2025-07-20 17:48:39,745 - __main__ - INFO - sglang running req: 1 queue req: 0
  26274. 2025-07-20 17:48:40,589 - sglang - INFO - [2025-07-20 17:48:40 TP0] Decode batch. #running-req: 1, #token: 4892, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
  26275. 2025-07-20 17:48:40,589 - __main__ - INFO - sglang running req: 1 queue req: 0
  26276. 2025-07-20 17:48:41,435 - sglang - INFO - [2025-07-20 17:48:41 TP0] Decode batch. #running-req: 1, #token: 4932, token usage: 0.13, gen throughput (token/s): 47.27, #queue-req: 0
  26277. 2025-07-20 17:48:41,435 - __main__ - INFO - sglang running req: 1 queue req: 0
  26278. 2025-07-20 17:48:42,284 - sglang - INFO - [2025-07-20 17:48:42 TP0] Decode batch. #running-req: 1, #token: 4972, token usage: 0.13, gen throughput (token/s): 47.11, #queue-req: 0
  26279. 2025-07-20 17:48:42,284 - __main__ - INFO - sglang running req: 1 queue req: 0
  26280. 2025-07-20 17:48:43,133 - sglang - INFO - [2025-07-20 17:48:43 TP0] Decode batch. #running-req: 1, #token: 5012, token usage: 0.13, gen throughput (token/s): 47.13, #queue-req: 0
  26281. 2025-07-20 17:48:43,133 - __main__ - INFO - sglang running req: 1 queue req: 0
  26282. 2025-07-20 17:48:43,973 - sglang - INFO - [2025-07-20 17:48:43 TP0] Decode batch. #running-req: 1, #token: 5052, token usage: 0.13, gen throughput (token/s): 47.61, #queue-req: 0
  26283. 2025-07-20 17:48:43,973 - __main__ - INFO - sglang running req: 1 queue req: 0
  26284. 2025-07-20 17:48:44,809 - sglang - INFO - [2025-07-20 17:48:44 TP0] Decode batch. #running-req: 1, #token: 5092, token usage: 0.13, gen throughput (token/s): 47.84, #queue-req: 0
  26285. 2025-07-20 17:48:44,809 - __main__ - INFO - sglang running req: 1 queue req: 0
  26286. 2025-07-20 17:48:45,653 - sglang - INFO - [2025-07-20 17:48:45 TP0] Decode batch. #running-req: 1, #token: 5132, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
  26287. 2025-07-20 17:48:45,653 - __main__ - INFO - sglang running req: 1 queue req: 0
  26288. 2025-07-20 17:48:46,496 - sglang - INFO - [2025-07-20 17:48:46 TP0] Decode batch. #running-req: 1, #token: 5172, token usage: 0.14, gen throughput (token/s): 47.40, #queue-req: 0
  26289. 2025-07-20 17:48:46,497 - __main__ - INFO - sglang running req: 1 queue req: 0
  26290. 2025-07-20 17:48:46,997 - __main__ - INFO - Queue remaining: 0
  26291. 2025-07-20 17:48:46,997 - __main__ - INFO -
  26292. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26293. ----------------------------------------------------------------------------------
  26294. finished_input_tokens 326.73 0.00
  26295. finished_output_tokens 78.47 0.00
  26296. sglang_input_tokens 353.80 43.47
  26297. sglang_output_tokens 99.84 50.00
  26298. 2025-07-20 17:48:46,997 - __main__ - INFO -
  26299. Worker ID | finished | started
  26300. ----------+----------+--------
  26301. 0 | 8 | 9
  26302. 2025-07-20 17:48:47,339 - sglang - INFO - [2025-07-20 17:48:47 TP0] Decode batch. #running-req: 1, #token: 5212, token usage: 0.14, gen throughput (token/s): 47.48, #queue-req: 0
  26303. 2025-07-20 17:48:47,339 - __main__ - INFO - sglang running req: 1 queue req: 0
  26304. 2025-07-20 17:48:48,178 - sglang - INFO - [2025-07-20 17:48:48 TP0] Decode batch. #running-req: 1, #token: 5252, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
  26305. 2025-07-20 17:48:48,178 - __main__ - INFO - sglang running req: 1 queue req: 0
  26306. 2025-07-20 17:48:49,025 - sglang - INFO - [2025-07-20 17:48:49 TP0] Decode batch. #running-req: 1, #token: 5292, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
  26307. 2025-07-20 17:48:49,025 - __main__ - INFO - sglang running req: 1 queue req: 0
  26308. 2025-07-20 17:48:49,875 - sglang - INFO - [2025-07-20 17:48:49 TP0] Decode batch. #running-req: 1, #token: 5332, token usage: 0.14, gen throughput (token/s): 47.04, #queue-req: 0
  26309. 2025-07-20 17:48:49,876 - __main__ - INFO - sglang running req: 1 queue req: 0
  26310. 2025-07-20 17:48:50,719 - sglang - INFO - [2025-07-20 17:48:50 TP0] Decode batch. #running-req: 1, #token: 5372, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
  26311. 2025-07-20 17:48:50,719 - __main__ - INFO - sglang running req: 1 queue req: 0
  26312. 2025-07-20 17:48:51,558 - sglang - INFO - [2025-07-20 17:48:51 TP0] Decode batch. #running-req: 1, #token: 5412, token usage: 0.14, gen throughput (token/s): 47.66, #queue-req: 0
  26313. 2025-07-20 17:48:51,558 - __main__ - INFO - sglang running req: 1 queue req: 0
  26314. 2025-07-20 17:48:52,403 - sglang - INFO - [2025-07-20 17:48:52 TP0] Decode batch. #running-req: 1, #token: 5452, token usage: 0.14, gen throughput (token/s): 47.34, #queue-req: 0
  26315. 2025-07-20 17:48:52,403 - __main__ - INFO - sglang running req: 1 queue req: 0
  26316. 2025-07-20 17:48:53,250 - sglang - INFO - [2025-07-20 17:48:53 TP0] Decode batch. #running-req: 1, #token: 5492, token usage: 0.14, gen throughput (token/s): 47.24, #queue-req: 0
  26317. 2025-07-20 17:48:53,250 - __main__ - INFO - sglang running req: 1 queue req: 0
  26318. 2025-07-20 17:48:54,093 - sglang - INFO - [2025-07-20 17:48:54 TP0] Decode batch. #running-req: 1, #token: 5532, token usage: 0.15, gen throughput (token/s): 47.46, #queue-req: 0
  26319. 2025-07-20 17:48:54,093 - __main__ - INFO - sglang running req: 1 queue req: 0
  26320. 2025-07-20 17:48:54,933 - sglang - INFO - [2025-07-20 17:48:54 TP0] Decode batch. #running-req: 1, #token: 5572, token usage: 0.15, gen throughput (token/s): 47.59, #queue-req: 0
  26321. 2025-07-20 17:48:54,933 - __main__ - INFO - sglang running req: 1 queue req: 0
  26322. 2025-07-20 17:48:55,682 - __main__ - WARNING - JSON decode error on attempt 6 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  26323. 2025-07-20 17:48:55,868 - __main__ - INFO - Built page query for test_pdf/1144520000702630XG344010604301601.pdf-5
  26324. 2025-07-20 17:48:56,071 - sglang - INFO - [2025-07-20 17:48:56 TP0] Prefill batch. #new-seq: 1, #new-token: 2608, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26325. 2025-07-20 17:48:56,071 - __main__ - INFO - sglang running req: 0 queue req: 0
  26326. 2025-07-20 17:48:56,956 - sglang - INFO - [2025-07-20 17:48:56 TP0] Decode batch. #running-req: 1, #token: 2613, token usage: 0.07, gen throughput (token/s): 19.77, #queue-req: 0
  26327. 2025-07-20 17:48:56,956 - __main__ - INFO - sglang running req: 1 queue req: 0
  26328. 2025-07-20 17:48:56,999 - __main__ - INFO - Queue remaining: 0
  26329. 2025-07-20 17:48:56,999 - __main__ - INFO -
  26330. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26331. ----------------------------------------------------------------------------------
  26332. finished_input_tokens 324.48 0.00
  26333. finished_output_tokens 77.93 0.00
  26334. sglang_input_tokens 353.16 43.47
  26335. sglang_output_tokens 101.22 50.00
  26336. 2025-07-20 17:48:57,000 - __main__ - INFO -
  26337. Worker ID | finished | started
  26338. ----------+----------+--------
  26339. 0 | 8 | 9
  26340. 2025-07-20 17:48:57,788 - sglang - INFO - [2025-07-20 17:48:57 TP0] Decode batch. #running-req: 1, #token: 2653, token usage: 0.07, gen throughput (token/s): 48.06, #queue-req: 0
  26341. 2025-07-20 17:48:57,789 - __main__ - INFO - sglang running req: 1 queue req: 0
  26342. 2025-07-20 17:48:58,621 - sglang - INFO - [2025-07-20 17:48:58 TP0] Decode batch. #running-req: 1, #token: 2693, token usage: 0.07, gen throughput (token/s): 48.03, #queue-req: 0
  26343. 2025-07-20 17:48:58,621 - __main__ - INFO - sglang running req: 1 queue req: 0
  26344. 2025-07-20 17:48:59,459 - sglang - INFO - [2025-07-20 17:48:59 TP0] Decode batch. #running-req: 1, #token: 2733, token usage: 0.07, gen throughput (token/s): 47.75, #queue-req: 0
  26345. 2025-07-20 17:48:59,459 - __main__ - INFO - sglang running req: 1 queue req: 0
  26346. 2025-07-20 17:49:00,293 - sglang - INFO - [2025-07-20 17:49:00 TP0] Decode batch. #running-req: 1, #token: 2773, token usage: 0.07, gen throughput (token/s): 47.95, #queue-req: 0
  26347. 2025-07-20 17:49:00,293 - __main__ - INFO - sglang running req: 1 queue req: 0
  26348. 2025-07-20 17:49:01,122 - sglang - INFO - [2025-07-20 17:49:01 TP0] Decode batch. #running-req: 1, #token: 2813, token usage: 0.07, gen throughput (token/s): 48.23, #queue-req: 0
  26349. 2025-07-20 17:49:01,122 - __main__ - INFO - sglang running req: 1 queue req: 0
  26350. 2025-07-20 17:49:01,952 - sglang - INFO - [2025-07-20 17:49:01 TP0] Decode batch. #running-req: 1, #token: 2853, token usage: 0.08, gen throughput (token/s): 48.19, #queue-req: 0
  26351. 2025-07-20 17:49:01,952 - __main__ - INFO - sglang running req: 1 queue req: 0
  26352. 2025-07-20 17:49:02,782 - sglang - INFO - [2025-07-20 17:49:02 TP0] Decode batch. #running-req: 1, #token: 2893, token usage: 0.08, gen throughput (token/s): 48.18, #queue-req: 0
  26353. 2025-07-20 17:49:02,783 - __main__ - INFO - sglang running req: 1 queue req: 0
  26354. 2025-07-20 17:49:03,619 - sglang - INFO - [2025-07-20 17:49:03 TP0] Decode batch. #running-req: 1, #token: 2933, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  26355. 2025-07-20 17:49:03,619 - __main__ - INFO - sglang running req: 1 queue req: 0
  26356. 2025-07-20 17:49:04,453 - sglang - INFO - [2025-07-20 17:49:04 TP0] Decode batch. #running-req: 1, #token: 2973, token usage: 0.08, gen throughput (token/s): 47.94, #queue-req: 0
  26357. 2025-07-20 17:49:04,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  26358. 2025-07-20 17:49:05,285 - sglang - INFO - [2025-07-20 17:49:05 TP0] Decode batch. #running-req: 1, #token: 3013, token usage: 0.08, gen throughput (token/s): 48.10, #queue-req: 0
  26359. 2025-07-20 17:49:05,285 - __main__ - INFO - sglang running req: 1 queue req: 0
  26360. 2025-07-20 17:49:06,121 - sglang - INFO - [2025-07-20 17:49:06 TP0] Decode batch. #running-req: 1, #token: 3053, token usage: 0.08, gen throughput (token/s): 47.82, #queue-req: 0
  26361. 2025-07-20 17:49:06,122 - __main__ - INFO - sglang running req: 1 queue req: 0
  26362. 2025-07-20 17:49:06,957 - sglang - INFO - [2025-07-20 17:49:06 TP0] Decode batch. #running-req: 1, #token: 3093, token usage: 0.08, gen throughput (token/s): 47.84, #queue-req: 0
  26363. 2025-07-20 17:49:06,958 - __main__ - INFO - sglang running req: 1 queue req: 0
  26364. 2025-07-20 17:49:07,000 - __main__ - INFO - Queue remaining: 0
  26365. 2025-07-20 17:49:07,001 - __main__ - INFO -
  26366. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26367. ----------------------------------------------------------------------------------
  26368. finished_input_tokens 322.25 0.00
  26369. finished_output_tokens 77.39 0.00
  26370. sglang_input_tokens 350.74 43.47
  26371. sglang_output_tokens 100.53 50.00
  26372. 2025-07-20 17:49:07,001 - __main__ - INFO -
  26373. Worker ID | finished | started
  26374. ----------+----------+--------
  26375. 0 | 8 | 9
  26376. 2025-07-20 17:49:07,789 - sglang - INFO - [2025-07-20 17:49:07 TP0] Decode batch. #running-req: 1, #token: 3133, token usage: 0.08, gen throughput (token/s): 48.11, #queue-req: 0
  26377. 2025-07-20 17:49:07,789 - __main__ - INFO - sglang running req: 1 queue req: 0
  26378. 2025-07-20 17:49:08,621 - sglang - INFO - [2025-07-20 17:49:08 TP0] Decode batch. #running-req: 1, #token: 3173, token usage: 0.08, gen throughput (token/s): 48.09, #queue-req: 0
  26379. 2025-07-20 17:49:08,621 - __main__ - INFO - sglang running req: 1 queue req: 0
  26380. 2025-07-20 17:49:09,457 - sglang - INFO - [2025-07-20 17:49:09 TP0] Decode batch. #running-req: 1, #token: 3213, token usage: 0.08, gen throughput (token/s): 47.85, #queue-req: 0
  26381. 2025-07-20 17:49:09,457 - __main__ - INFO - sglang running req: 1 queue req: 0
  26382. 2025-07-20 17:49:10,298 - sglang - INFO - [2025-07-20 17:49:10 TP0] Decode batch. #running-req: 1, #token: 3253, token usage: 0.09, gen throughput (token/s): 47.52, #queue-req: 0
  26383. 2025-07-20 17:49:10,299 - __main__ - INFO - sglang running req: 1 queue req: 0
  26384. 2025-07-20 17:49:11,142 - sglang - INFO - [2025-07-20 17:49:11 TP0] Decode batch. #running-req: 1, #token: 3293, token usage: 0.09, gen throughput (token/s): 47.42, #queue-req: 0
  26385. 2025-07-20 17:49:11,142 - __main__ - INFO - sglang running req: 1 queue req: 0
  26386. 2025-07-20 17:49:11,975 - sglang - INFO - [2025-07-20 17:49:11 TP0] Decode batch. #running-req: 1, #token: 3333, token usage: 0.09, gen throughput (token/s): 48.05, #queue-req: 0
  26387. 2025-07-20 17:49:11,975 - __main__ - INFO - sglang running req: 1 queue req: 0
  26388. 2025-07-20 17:49:12,809 - sglang - INFO - [2025-07-20 17:49:12 TP0] Decode batch. #running-req: 1, #token: 3373, token usage: 0.09, gen throughput (token/s): 47.95, #queue-req: 0
  26389. 2025-07-20 17:49:12,809 - __main__ - INFO - sglang running req: 1 queue req: 0
  26390. 2025-07-20 17:49:13,648 - sglang - INFO - [2025-07-20 17:49:13 TP0] Decode batch. #running-req: 1, #token: 3413, token usage: 0.09, gen throughput (token/s): 47.67, #queue-req: 0
  26391. 2025-07-20 17:49:13,648 - __main__ - INFO - sglang running req: 1 queue req: 0
  26392. 2025-07-20 17:49:14,488 - sglang - INFO - [2025-07-20 17:49:14 TP0] Decode batch. #running-req: 1, #token: 3453, token usage: 0.09, gen throughput (token/s): 47.58, #queue-req: 0
  26393. 2025-07-20 17:49:14,489 - __main__ - INFO - sglang running req: 1 queue req: 0
  26394. 2025-07-20 17:49:15,332 - sglang - INFO - [2025-07-20 17:49:15 TP0] Decode batch. #running-req: 1, #token: 3493, token usage: 0.09, gen throughput (token/s): 47.40, #queue-req: 0
  26395. 2025-07-20 17:49:15,333 - __main__ - INFO - sglang running req: 1 queue req: 0
  26396. 2025-07-20 17:49:16,173 - sglang - INFO - [2025-07-20 17:49:16 TP0] Decode batch. #running-req: 1, #token: 3533, token usage: 0.09, gen throughput (token/s): 47.61, #queue-req: 0
  26397. 2025-07-20 17:49:16,173 - __main__ - INFO - sglang running req: 1 queue req: 0
  26398. 2025-07-20 17:49:17,003 - __main__ - INFO - Queue remaining: 0
  26399. 2025-07-20 17:49:17,004 - __main__ - INFO -
  26400. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26401. ----------------------------------------------------------------------------------
  26402. finished_input_tokens 320.06 0.00
  26403. finished_output_tokens 76.87 0.00
  26404. sglang_input_tokens 348.36 43.47
  26405. sglang_output_tokens 99.85 50.00
  26406. 2025-07-20 17:49:17,004 - __main__ - INFO -
  26407. Worker ID | finished | started
  26408. ----------+----------+--------
  26409. 0 | 8 | 9
  26410. 2025-07-20 17:49:17,016 - sglang - INFO - [2025-07-20 17:49:17 TP0] Decode batch. #running-req: 1, #token: 3573, token usage: 0.09, gen throughput (token/s): 47.43, #queue-req: 0
  26411. 2025-07-20 17:49:17,016 - __main__ - INFO - sglang running req: 1 queue req: 0
  26412. 2025-07-20 17:49:17,860 - sglang - INFO - [2025-07-20 17:49:17 TP0] Decode batch. #running-req: 1, #token: 3613, token usage: 0.10, gen throughput (token/s): 47.38, #queue-req: 0
  26413. 2025-07-20 17:49:17,860 - __main__ - INFO - sglang running req: 1 queue req: 0
  26414. 2025-07-20 17:49:18,694 - sglang - INFO - [2025-07-20 17:49:18 TP0] Decode batch. #running-req: 1, #token: 3653, token usage: 0.10, gen throughput (token/s): 47.99, #queue-req: 0
  26415. 2025-07-20 17:49:18,694 - __main__ - INFO - sglang running req: 1 queue req: 0
  26416. 2025-07-20 17:49:19,530 - sglang - INFO - [2025-07-20 17:49:19 TP0] Decode batch. #running-req: 1, #token: 3693, token usage: 0.10, gen throughput (token/s): 47.81, #queue-req: 0
  26417. 2025-07-20 17:49:19,530 - __main__ - INFO - sglang running req: 1 queue req: 0
  26418. 2025-07-20 17:49:20,371 - sglang - INFO - [2025-07-20 17:49:20 TP0] Decode batch. #running-req: 1, #token: 3733, token usage: 0.10, gen throughput (token/s): 47.57, #queue-req: 0
  26419. 2025-07-20 17:49:20,371 - __main__ - INFO - sglang running req: 1 queue req: 0
  26420. 2025-07-20 17:49:21,207 - sglang - INFO - [2025-07-20 17:49:21 TP0] Decode batch. #running-req: 1, #token: 3773, token usage: 0.10, gen throughput (token/s): 47.84, #queue-req: 0
  26421. 2025-07-20 17:49:21,207 - __main__ - INFO - sglang running req: 1 queue req: 0
  26422. 2025-07-20 17:49:22,046 - sglang - INFO - [2025-07-20 17:49:22 TP0] Decode batch. #running-req: 1, #token: 3813, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
  26423. 2025-07-20 17:49:22,046 - __main__ - INFO - sglang running req: 1 queue req: 0
  26424. 2025-07-20 17:49:22,886 - sglang - INFO - [2025-07-20 17:49:22 TP0] Decode batch. #running-req: 1, #token: 3853, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
  26425. 2025-07-20 17:49:22,886 - __main__ - INFO - sglang running req: 1 queue req: 0
  26426. 2025-07-20 17:49:23,730 - sglang - INFO - [2025-07-20 17:49:23 TP0] Decode batch. #running-req: 1, #token: 3893, token usage: 0.10, gen throughput (token/s): 47.37, #queue-req: 0
  26427. 2025-07-20 17:49:23,730 - __main__ - INFO - sglang running req: 1 queue req: 0
  26428. 2025-07-20 17:49:24,576 - sglang - INFO - [2025-07-20 17:49:24 TP0] Decode batch. #running-req: 1, #token: 3933, token usage: 0.10, gen throughput (token/s): 47.32, #queue-req: 0
  26429. 2025-07-20 17:49:24,576 - __main__ - INFO - sglang running req: 1 queue req: 0
  26430. 2025-07-20 17:49:25,410 - sglang - INFO - [2025-07-20 17:49:25 TP0] Decode batch. #running-req: 1, #token: 3973, token usage: 0.10, gen throughput (token/s): 47.95, #queue-req: 0
  26431. 2025-07-20 17:49:25,410 - __main__ - INFO - sglang running req: 1 queue req: 0
  26432. 2025-07-20 17:49:26,244 - sglang - INFO - [2025-07-20 17:49:26 TP0] Decode batch. #running-req: 1, #token: 4013, token usage: 0.11, gen throughput (token/s): 47.97, #queue-req: 0
  26433. 2025-07-20 17:49:26,244 - __main__ - INFO - sglang running req: 1 queue req: 0
  26434. 2025-07-20 17:49:27,005 - __main__ - INFO - Queue remaining: 0
  26435. 2025-07-20 17:49:27,006 - __main__ - INFO -
  26436. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26437. ----------------------------------------------------------------------------------
  26438. finished_input_tokens 317.90 0.00
  26439. finished_output_tokens 76.35 0.00
  26440. sglang_input_tokens 346.00 43.47
  26441. sglang_output_tokens 99.17 50.00
  26442. 2025-07-20 17:49:27,006 - __main__ - INFO -
  26443. Worker ID | finished | started
  26444. ----------+----------+--------
  26445. 0 | 8 | 9
  26446. 2025-07-20 17:49:27,085 - sglang - INFO - [2025-07-20 17:49:27 TP0] Decode batch. #running-req: 1, #token: 4053, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
  26447. 2025-07-20 17:49:27,085 - __main__ - INFO - sglang running req: 1 queue req: 0
  26448. 2025-07-20 17:49:27,926 - sglang - INFO - [2025-07-20 17:49:27 TP0] Decode batch. #running-req: 1, #token: 4093, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
  26449. 2025-07-20 17:49:27,926 - __main__ - INFO - sglang running req: 1 queue req: 0
  26450. 2025-07-20 17:49:28,769 - sglang - INFO - [2025-07-20 17:49:28 TP0] Decode batch. #running-req: 1, #token: 4133, token usage: 0.11, gen throughput (token/s): 47.47, #queue-req: 0
  26451. 2025-07-20 17:49:28,769 - __main__ - INFO - sglang running req: 1 queue req: 0
  26452. 2025-07-20 17:49:29,611 - sglang - INFO - [2025-07-20 17:49:29 TP0] Decode batch. #running-req: 1, #token: 4173, token usage: 0.11, gen throughput (token/s): 47.50, #queue-req: 0
  26453. 2025-07-20 17:49:29,611 - __main__ - INFO - sglang running req: 1 queue req: 0
  26454. 2025-07-20 17:49:30,456 - sglang - INFO - [2025-07-20 17:49:30 TP0] Decode batch. #running-req: 1, #token: 4213, token usage: 0.11, gen throughput (token/s): 47.31, #queue-req: 0
  26455. 2025-07-20 17:49:30,457 - __main__ - INFO - sglang running req: 1 queue req: 0
  26456. 2025-07-20 17:49:31,305 - sglang - INFO - [2025-07-20 17:49:31 TP0] Decode batch. #running-req: 1, #token: 4253, token usage: 0.11, gen throughput (token/s): 47.16, #queue-req: 0
  26457. 2025-07-20 17:49:31,305 - __main__ - INFO - sglang running req: 1 queue req: 0
  26458. 2025-07-20 17:49:32,143 - sglang - INFO - [2025-07-20 17:49:32 TP0] Decode batch. #running-req: 1, #token: 4293, token usage: 0.11, gen throughput (token/s): 47.69, #queue-req: 0
  26459. 2025-07-20 17:49:32,143 - __main__ - INFO - sglang running req: 1 queue req: 0
  26460. 2025-07-20 17:49:32,979 - sglang - INFO - [2025-07-20 17:49:32 TP0] Decode batch. #running-req: 1, #token: 4333, token usage: 0.11, gen throughput (token/s): 47.86, #queue-req: 0
  26461. 2025-07-20 17:49:32,979 - __main__ - INFO - sglang running req: 1 queue req: 0
  26462. 2025-07-20 17:49:33,821 - sglang - INFO - [2025-07-20 17:49:33 TP0] Decode batch. #running-req: 1, #token: 4373, token usage: 0.12, gen throughput (token/s): 47.49, #queue-req: 0
  26463. 2025-07-20 17:49:33,821 - __main__ - INFO - sglang running req: 1 queue req: 0
  26464. 2025-07-20 17:49:34,665 - sglang - INFO - [2025-07-20 17:49:34 TP0] Decode batch. #running-req: 1, #token: 4413, token usage: 0.12, gen throughput (token/s): 47.43, #queue-req: 0
  26465. 2025-07-20 17:49:34,665 - __main__ - INFO - sglang running req: 1 queue req: 0
  26466. 2025-07-20 17:49:35,510 - sglang - INFO - [2025-07-20 17:49:35 TP0] Decode batch. #running-req: 1, #token: 4453, token usage: 0.12, gen throughput (token/s): 47.30, #queue-req: 0
  26467. 2025-07-20 17:49:35,510 - __main__ - INFO - sglang running req: 1 queue req: 0
  26468. 2025-07-20 17:49:36,353 - sglang - INFO - [2025-07-20 17:49:36 TP0] Decode batch. #running-req: 1, #token: 4493, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  26469. 2025-07-20 17:49:36,353 - __main__ - INFO - sglang running req: 1 queue req: 0
  26470. 2025-07-20 17:49:37,007 - __main__ - INFO - Queue remaining: 0
  26471. 2025-07-20 17:49:37,007 - __main__ - INFO -
  26472. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26473. ----------------------------------------------------------------------------------
  26474. finished_input_tokens 315.77 0.00
  26475. finished_output_tokens 75.83 0.00
  26476. sglang_input_tokens 343.68 43.47
  26477. sglang_output_tokens 98.51 50.00
  26478. 2025-07-20 17:49:37,007 - __main__ - INFO -
  26479. Worker ID | finished | started
  26480. ----------+----------+--------
  26481. 0 | 8 | 9
  26482. 2025-07-20 17:49:37,200 - sglang - INFO - [2025-07-20 17:49:37 TP0] Decode batch. #running-req: 1, #token: 4533, token usage: 0.12, gen throughput (token/s): 47.21, #queue-req: 0
  26483. 2025-07-20 17:49:37,201 - __main__ - INFO - sglang running req: 1 queue req: 0
  26484. 2025-07-20 17:49:38,048 - sglang - INFO - [2025-07-20 17:49:38 TP0] Decode batch. #running-req: 1, #token: 4573, token usage: 0.12, gen throughput (token/s): 47.18, #queue-req: 0
  26485. 2025-07-20 17:49:38,049 - __main__ - INFO - sglang running req: 1 queue req: 0
  26486. 2025-07-20 17:49:38,891 - sglang - INFO - [2025-07-20 17:49:38 TP0] Decode batch. #running-req: 1, #token: 4613, token usage: 0.12, gen throughput (token/s): 47.48, #queue-req: 0
  26487. 2025-07-20 17:49:38,891 - __main__ - INFO - sglang running req: 1 queue req: 0
  26488. 2025-07-20 17:49:39,730 - sglang - INFO - [2025-07-20 17:49:39 TP0] Decode batch. #running-req: 1, #token: 4653, token usage: 0.12, gen throughput (token/s): 47.64, #queue-req: 0
  26489. 2025-07-20 17:49:39,730 - __main__ - INFO - sglang running req: 1 queue req: 0
  26490. 2025-07-20 17:49:40,574 - sglang - INFO - [2025-07-20 17:49:40 TP0] Decode batch. #running-req: 1, #token: 4693, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
  26491. 2025-07-20 17:49:40,574 - __main__ - INFO - sglang running req: 1 queue req: 0
  26492. 2025-07-20 17:49:41,418 - sglang - INFO - [2025-07-20 17:49:41 TP0] Decode batch. #running-req: 1, #token: 4733, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
  26493. 2025-07-20 17:49:41,418 - __main__ - INFO - sglang running req: 1 queue req: 0
  26494. 2025-07-20 17:49:42,261 - sglang - INFO - [2025-07-20 17:49:42 TP0] Decode batch. #running-req: 1, #token: 4773, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
  26495. 2025-07-20 17:49:42,262 - __main__ - INFO - sglang running req: 1 queue req: 0
  26496. 2025-07-20 17:49:43,105 - sglang - INFO - [2025-07-20 17:49:43 TP0] Decode batch. #running-req: 1, #token: 4813, token usage: 0.13, gen throughput (token/s): 47.42, #queue-req: 0
  26497. 2025-07-20 17:49:43,105 - __main__ - INFO - sglang running req: 1 queue req: 0
  26498. 2025-07-20 17:49:43,948 - sglang - INFO - [2025-07-20 17:49:43 TP0] Decode batch. #running-req: 1, #token: 4853, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
  26499. 2025-07-20 17:49:43,948 - __main__ - INFO - sglang running req: 1 queue req: 0
  26500. 2025-07-20 17:49:44,797 - sglang - INFO - [2025-07-20 17:49:44 TP0] Decode batch. #running-req: 1, #token: 4893, token usage: 0.13, gen throughput (token/s): 47.14, #queue-req: 0
  26501. 2025-07-20 17:49:44,797 - __main__ - INFO - sglang running req: 1 queue req: 0
  26502. 2025-07-20 17:49:45,637 - sglang - INFO - [2025-07-20 17:49:45 TP0] Decode batch. #running-req: 1, #token: 4933, token usage: 0.13, gen throughput (token/s): 47.59, #queue-req: 0
  26503. 2025-07-20 17:49:45,637 - __main__ - INFO - sglang running req: 1 queue req: 0
  26504. 2025-07-20 17:49:46,475 - sglang - INFO - [2025-07-20 17:49:46 TP0] Decode batch. #running-req: 1, #token: 4973, token usage: 0.13, gen throughput (token/s): 47.76, #queue-req: 0
  26505. 2025-07-20 17:49:46,475 - __main__ - INFO - sglang running req: 1 queue req: 0
  26506. 2025-07-20 17:49:47,008 - __main__ - INFO - Queue remaining: 0
  26507. 2025-07-20 17:49:47,008 - __main__ - INFO -
  26508. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26509. ----------------------------------------------------------------------------------
  26510. finished_input_tokens 313.66 0.00
  26511. finished_output_tokens 75.33 0.00
  26512. sglang_input_tokens 341.39 43.47
  26513. sglang_output_tokens 97.85 50.00
  26514. 2025-07-20 17:49:47,009 - __main__ - INFO -
  26515. Worker ID | finished | started
  26516. ----------+----------+--------
  26517. 0 | 8 | 9
  26518. 2025-07-20 17:49:47,317 - sglang - INFO - [2025-07-20 17:49:47 TP0] Decode batch. #running-req: 1, #token: 5013, token usage: 0.13, gen throughput (token/s): 47.46, #queue-req: 0
  26519. 2025-07-20 17:49:47,318 - __main__ - INFO - sglang running req: 1 queue req: 0
  26520. 2025-07-20 17:49:48,161 - sglang - INFO - [2025-07-20 17:49:48 TP0] Decode batch. #running-req: 1, #token: 5053, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
  26521. 2025-07-20 17:49:48,162 - __main__ - INFO - sglang running req: 1 queue req: 0
  26522. 2025-07-20 17:49:49,005 - sglang - INFO - [2025-07-20 17:49:49 TP0] Decode batch. #running-req: 1, #token: 5093, token usage: 0.13, gen throughput (token/s): 47.44, #queue-req: 0
  26523. 2025-07-20 17:49:49,005 - __main__ - INFO - sglang running req: 1 queue req: 0
  26524. 2025-07-20 17:49:49,844 - sglang - INFO - [2025-07-20 17:49:49 TP0] Decode batch. #running-req: 1, #token: 5133, token usage: 0.14, gen throughput (token/s): 47.67, #queue-req: 0
  26525. 2025-07-20 17:49:49,844 - __main__ - INFO - sglang running req: 1 queue req: 0
  26526. 2025-07-20 17:49:50,686 - sglang - INFO - [2025-07-20 17:49:50 TP0] Decode batch. #running-req: 1, #token: 5173, token usage: 0.14, gen throughput (token/s): 47.49, #queue-req: 0
  26527. 2025-07-20 17:49:50,686 - __main__ - INFO - sglang running req: 1 queue req: 0
  26528. 2025-07-20 17:49:51,535 - sglang - INFO - [2025-07-20 17:49:51 TP0] Decode batch. #running-req: 1, #token: 5213, token usage: 0.14, gen throughput (token/s): 47.14, #queue-req: 0
  26529. 2025-07-20 17:49:51,535 - __main__ - INFO - sglang running req: 1 queue req: 0
  26530. 2025-07-20 17:49:52,379 - sglang - INFO - [2025-07-20 17:49:52 TP0] Decode batch. #running-req: 1, #token: 5253, token usage: 0.14, gen throughput (token/s): 47.39, #queue-req: 0
  26531. 2025-07-20 17:49:52,379 - __main__ - INFO - sglang running req: 1 queue req: 0
  26532. 2025-07-20 17:49:53,220 - sglang - INFO - [2025-07-20 17:49:53 TP0] Decode batch. #running-req: 1, #token: 5293, token usage: 0.14, gen throughput (token/s): 47.57, #queue-req: 0
  26533. 2025-07-20 17:49:53,220 - __main__ - INFO - sglang running req: 1 queue req: 0
  26534. 2025-07-20 17:49:54,063 - sglang - INFO - [2025-07-20 17:49:54 TP0] Decode batch. #running-req: 1, #token: 5333, token usage: 0.14, gen throughput (token/s): 47.41, #queue-req: 0
  26535. 2025-07-20 17:49:54,064 - __main__ - INFO - sglang running req: 1 queue req: 0
  26536. 2025-07-20 17:49:54,911 - sglang - INFO - [2025-07-20 17:49:54 TP0] Decode batch. #running-req: 1, #token: 5373, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
  26537. 2025-07-20 17:49:54,911 - __main__ - INFO - sglang running req: 1 queue req: 0
  26538. 2025-07-20 17:49:55,752 - sglang - INFO - [2025-07-20 17:49:55 TP0] Decode batch. #running-req: 1, #token: 5413, token usage: 0.14, gen throughput (token/s): 47.55, #queue-req: 0
  26539. 2025-07-20 17:49:55,752 - __main__ - INFO - sglang running req: 1 queue req: 0
  26540. 2025-07-20 17:49:56,592 - sglang - INFO - [2025-07-20 17:49:56 TP0] Decode batch. #running-req: 1, #token: 5453, token usage: 0.14, gen throughput (token/s): 47.64, #queue-req: 0
  26541. 2025-07-20 17:49:56,592 - __main__ - INFO - sglang running req: 1 queue req: 0
  26542. 2025-07-20 17:49:57,010 - __main__ - INFO - Queue remaining: 0
  26543. 2025-07-20 17:49:57,010 - __main__ - INFO -
  26544. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26545. ----------------------------------------------------------------------------------
  26546. finished_input_tokens 311.59 0.00
  26547. finished_output_tokens 74.83 0.00
  26548. sglang_input_tokens 339.13 43.47
  26549. sglang_output_tokens 97.20 50.00
  26550. 2025-07-20 17:49:57,010 - __main__ - INFO -
  26551. Worker ID | finished | started
  26552. ----------+----------+--------
  26553. 0 | 8 | 9
  26554. 2025-07-20 17:49:57,432 - sglang - INFO - [2025-07-20 17:49:57 TP0] Decode batch. #running-req: 1, #token: 5493, token usage: 0.14, gen throughput (token/s): 47.62, #queue-req: 0
  26555. 2025-07-20 17:49:57,432 - __main__ - INFO - sglang running req: 1 queue req: 0
  26556. 2025-07-20 17:49:58,278 - sglang - INFO - [2025-07-20 17:49:58 TP0] Decode batch. #running-req: 1, #token: 5533, token usage: 0.15, gen throughput (token/s): 47.26, #queue-req: 0
  26557. 2025-07-20 17:49:58,278 - __main__ - INFO - sglang running req: 1 queue req: 0
  26558. 2025-07-20 17:49:59,121 - sglang - INFO - [2025-07-20 17:49:59 TP0] Decode batch. #running-req: 1, #token: 5573, token usage: 0.15, gen throughput (token/s): 47.43, #queue-req: 0
  26559. 2025-07-20 17:49:59,122 - __main__ - INFO - sglang running req: 1 queue req: 0
  26560. 2025-07-20 17:49:59,844 - __main__ - WARNING - JSON decode error on attempt 7 for test_pdf/1144520000702630XG344010604301601.pdf-5: Unterminated string starting at: line 1 column 125 (char 124)
  26561. 2025-07-20 17:49:59,844 - __main__ - ERROR - Failed to process test_pdf/1144520000702630XG344010604301601.pdf-5 after 8 attempts.
  26562. 2025-07-20 17:49:59,857 - __main__ - ERROR - Document test_pdf/1144520000702630XG344010604301601.pdf has 1 fallback pages out of 9 exceeding max_page_error_rate of 0.004, discarding document.
  26563. 2025-07-20 17:49:59,858 - __main__ - INFO - Finished TaskGroup for worker on 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
  26564. 2025-07-20 17:49:59,858 - __main__ - INFO - Got 0 docs for 21ee5d5d32535bcacd750ef2dace24b98fa42fdb
  26565. 2025-07-20 17:49:59,859 - __main__ - INFO - Worker 0 exiting due to empty queue
  26566. 2025-07-20 17:49:59,860 - __main__ - INFO - Work done
  26567. 2025-07-20 17:49:59,860 - __main__ - INFO - Got cancellation request for SGLang server
  26568. 2025-08-24 23:25:02,460 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26569. 2025-08-24 23:25:02,460 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
  26570. 2025-08-24 23:25:02,461 - __main__ - INFO - Found 1 total pdf paths to add
  26571. 2025-08-24 23:25:02,465 - __main__ - INFO - Calculated items_per_group: 2 based on average pages per PDF: 5.00
  26572. 2025-08-24 23:25:02,621 - __main__ - INFO - Starting pipeline with PID 476723
  26573. 2025-08-24 23:25:02,621 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26574. 2025-08-24 23:25:02,710 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  26575. 2025-08-24 23:25:03,741 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  26576. 2025-08-24 23:25:04,787 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  26577. 2025-08-24 23:25:05,852 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  26578. 2025-08-24 23:25:06,918 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  26579. 2025-08-24 23:25:07,984 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  26580. 2025-08-24 23:25:09,043 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  26581. 2025-08-24 23:25:10,087 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  26582. 2025-08-24 23:25:11,136 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  26583. 2025-08-24 23:25:11,702 - sglang - INFO - [2025-08-24 23:25:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=704242960, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26584. 2025-08-24 23:25:11,702 - __main__ - INFO - [2025-08-24 23:25:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=704242960, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26585. 2025-08-24 23:25:12,199 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  26586. 2025-08-24 23:25:12,632 - sglang - INFO - [2025-08-24 23:25:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26587. 2025-08-24 23:25:12,633 - __main__ - INFO - [2025-08-24 23:25:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26588. 2025-08-24 23:25:13,252 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  26589. 2025-08-24 23:25:14,323 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  26590. 2025-08-24 23:25:15,395 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  26591. 2025-08-24 23:25:16,466 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  26592. 2025-08-24 23:25:17,528 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  26593. 2025-08-24 23:25:18,595 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  26594. 2025-08-24 23:25:18,874 - sglang - INFO - [2025-08-24 23:25:18 TP0] Overlap scheduler is disabled for multimodal models.
  26595. 2025-08-24 23:25:18,874 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Overlap scheduler is disabled for multimodal models.
  26596. 2025-08-24 23:25:18,876 - sglang - INFO - [2025-08-24 23:25:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26597. 2025-08-24 23:25:18,876 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26598. 2025-08-24 23:25:18,876 - sglang - INFO - [2025-08-24 23:25:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26599. 2025-08-24 23:25:18,877 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26600. 2025-08-24 23:25:18,877 - sglang - INFO - [2025-08-24 23:25:18 TP0] Init torch distributed begin.
  26601. 2025-08-24 23:25:18,877 - __main__ - INFO - [2025-08-24 23:25:18 TP0] Init torch distributed begin.
  26602. 2025-08-24 23:25:19,675 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  26603. 2025-08-24 23:25:20,742 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  26604. 2025-08-24 23:25:21,796 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  26605. 2025-08-24 23:25:22,863 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  26606. 2025-08-24 23:25:23,928 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  26607. 2025-08-24 23:25:24,213 - sglang - INFO - [2025-08-24 23:25:24 TP0] Load weight begin. avail mem=23.33 GB
  26608. 2025-08-24 23:25:24,213 - __main__ - INFO - [2025-08-24 23:25:24 TP0] Load weight begin. avail mem=23.33 GB
  26609. 2025-08-24 23:25:24,892 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26610. 2025-08-24 23:25:24,892 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26611. 2025-08-24 23:25:25,007 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  26612. 2025-08-24 23:25:25,705 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.23it/s]
  26613. 2025-08-24 23:25:25,705 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.23it/s]
  26614. 2025-08-24 23:25:26,086 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  26615. 2025-08-24 23:25:26,663 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.11it/s]
  26616. 2025-08-24 23:25:26,663 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.11it/s]
  26617. 2025-08-24 23:25:27,166 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  26618. 2025-08-24 23:25:27,593 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.10it/s]
  26619. 2025-08-24 23:25:27,593 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.10it/s]
  26620. 2025-08-24 23:25:28,036 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.37it/s]
  26621. 2025-08-24 23:25:28,036 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.37it/s]
  26622. 2025-08-24 23:25:28,036 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  26623. 2025-08-24 23:25:28,036 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.27it/s]
  26624. 2025-08-24 23:25:28,037 - sglang - INFO -
  26625. 2025-08-24 23:25:28,037 - __main__ - INFO -
  26626. 2025-08-24 23:25:28,100 - sglang - INFO - [2025-08-24 23:25:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26627. 2025-08-24 23:25:28,100 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26628. 2025-08-24 23:25:28,105 - sglang - INFO - [2025-08-24 23:25:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26629. 2025-08-24 23:25:28,105 - __main__ - INFO - [2025-08-24 23:25:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26630. 2025-08-24 23:25:28,106 - sglang - INFO - [2025-08-24 23:25:28 TP0] Memory pool end. avail mem=5.30 GB
  26631. 2025-08-24 23:25:28,106 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Memory pool end. avail mem=5.30 GB
  26632. 2025-08-24 23:25:28,247 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  26633. 2025-08-24 23:25:28,284 - sglang - INFO - [2025-08-24 23:25:28 TP0] Capture cuda graph begin. This can take up to several minutes.
  26634. 2025-08-24 23:25:28,284 - __main__ - INFO - [2025-08-24 23:25:28 TP0] Capture cuda graph begin. This can take up to several minutes.
  26635. 2025-08-24 23:25:29,330 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  26636. 2025-08-24 23:25:30,156 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.07s/it] 50%|█████ | 2/4 [00:01<00:01, 1.68it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.66it/s] 100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
  26637. 2025-08-24 23:25:30,156 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.07s/it] 50%|█████ | 2/4 [00:01<00:01, 1.68it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.66it/s] 100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
  26638. 2025-08-24 23:25:30,156 - sglang - INFO - [2025-08-24 23:25:30 TP0] Capture cuda graph end. Time elapsed: 1.87 s
  26639. 2025-08-24 23:25:30,156 - __main__ - INFO - [2025-08-24 23:25:30 TP0] Capture cuda graph end. Time elapsed: 1.87 s
  26640. 2025-08-24 23:25:30,416 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready...
  26641. 2025-08-24 23:25:30,848 - sglang - INFO - [2025-08-24 23:25:30 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26642. 2025-08-24 23:25:30,849 - __main__ - INFO - [2025-08-24 23:25:30 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26643. 2025-08-24 23:25:31,509 - __main__ - INFO - sglang server is ready.
  26644. 2025-08-24 23:25:31,509 - __main__ - INFO - Queue remaining: 1
  26645. 2025-08-24 23:25:31,509 - __main__ - INFO -
  26646. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26647. ----------------------------------------------------------------------------------
  26648. 2025-08-24 23:25:31,509 - __main__ - INFO -
  26649. Worker ID
  26650. ---------
  26651. 2025-08-24 23:25:31,510 - __main__ - INFO - Worker 0 processing work item c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
  26652. 2025-08-24 23:25:31,510 - __main__ - INFO - Created all tasks for c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
  26653. 2025-08-24 23:25:31,516 - __main__ - INFO - Got 5 pages to do for ./workspace/delivery.pdf in worker 0
  26654. 2025-08-24 23:25:31,925 - sglang - INFO - [2025-08-24 23:25:31 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26655. 2025-08-24 23:25:31,926 - __main__ - INFO - [2025-08-24 23:25:31 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26656. 2025-08-24 23:25:31,926 - __main__ - INFO - sglang running req: 0 queue req: 0
  26657. 2025-08-24 23:25:33,074 - sglang - INFO - [2025-08-24 23:25:33] The server is fired up and ready to roll!
  26658. 2025-08-24 23:25:33,075 - __main__ - INFO - [2025-08-24 23:25:33] The server is fired up and ready to roll!
  26659. 2025-08-24 23:25:37,800 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-1
  26660. 2025-08-24 23:25:37,839 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-2
  26661. 2025-08-24 23:25:37,875 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-3
  26662. 2025-08-24 23:25:37,886 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-4
  26663. 2025-08-24 23:25:37,907 - __main__ - INFO - Built page query for ./workspace/delivery.pdf-5
  26664. 2025-08-24 23:25:41,532 - __main__ - INFO - Queue remaining: 0
  26665. 2025-08-24 23:25:41,533 - __main__ - INFO -
  26666. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26667. ----------------------------------------------------------------------------------
  26668. 2025-08-24 23:25:41,533 - __main__ - INFO -
  26669. Worker ID | started
  26670. ----------+--------
  26671. 0 | 5
  26672. 2025-08-24 23:25:51,534 - __main__ - INFO - Queue remaining: 0
  26673. 2025-08-24 23:25:51,534 - __main__ - INFO -
  26674. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26675. ----------------------------------------------------------------------------------
  26676. 2025-08-24 23:25:51,535 - __main__ - INFO -
  26677. Worker ID | started
  26678. ----------+--------
  26679. 0 | 5
  26680. 2025-08-24 23:25:54,865 - sglang - INFO - [2025-08-24 23:25:54 TP0] Prefill batch. #new-seq: 1, #new-token: 2017, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26681. 2025-08-24 23:25:54,866 - __main__ - INFO - sglang running req: 0 queue req: 0
  26682. 2025-08-24 23:25:55,873 - sglang - INFO - [2025-08-24 23:25:55 TP0] Prefill batch. #new-seq: 4, #new-token: 8308, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.05, #running-req: 1, #queue-req: 0
  26683. 2025-08-24 23:25:55,874 - __main__ - INFO - sglang running req: 1 queue req: 0
  26684. 2025-08-24 23:25:59,272 - sglang - INFO - [2025-08-24 23:25:59 TP0] Decode batch. #running-req: 5, #token: 10490, token usage: 0.28, gen throughput (token/s): 6.05, #queue-req: 0
  26685. 2025-08-24 23:25:59,272 - __main__ - INFO - sglang running req: 5 queue req: 0
  26686. 2025-08-24 23:26:00,139 - sglang - INFO - [2025-08-24 23:26:00 TP0] Decode batch. #running-req: 5, #token: 10690, token usage: 0.28, gen throughput (token/s): 230.55, #queue-req: 0
  26687. 2025-08-24 23:26:00,140 - __main__ - INFO - sglang running req: 5 queue req: 0
  26688. 2025-08-24 23:26:01,008 - sglang - INFO - [2025-08-24 23:26:01 TP0] Decode batch. #running-req: 5, #token: 10890, token usage: 0.29, gen throughput (token/s): 230.35, #queue-req: 0
  26689. 2025-08-24 23:26:01,008 - __main__ - INFO - sglang running req: 5 queue req: 0
  26690. 2025-08-24 23:26:01,536 - __main__ - INFO - Queue remaining: 0
  26691. 2025-08-24 23:26:01,536 - __main__ - INFO -
  26692. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26693. ----------------------------------------------------------------------------------
  26694. 2025-08-24 23:26:01,537 - __main__ - INFO -
  26695. Worker ID | started
  26696. ----------+--------
  26697. 0 | 5
  26698. 2025-08-24 23:26:01,875 - sglang - INFO - [2025-08-24 23:26:01 TP0] Decode batch. #running-req: 5, #token: 11090, token usage: 0.29, gen throughput (token/s): 230.48, #queue-req: 0
  26699. 2025-08-24 23:26:01,876 - __main__ - INFO - sglang running req: 5 queue req: 0
  26700. 2025-08-24 23:26:02,745 - sglang - INFO - [2025-08-24 23:26:02 TP0] Decode batch. #running-req: 5, #token: 11290, token usage: 0.30, gen throughput (token/s): 229.99, #queue-req: 0
  26701. 2025-08-24 23:26:02,745 - __main__ - INFO - sglang running req: 5 queue req: 0
  26702. 2025-08-24 23:26:03,617 - sglang - INFO - [2025-08-24 23:26:03 TP0] Decode batch. #running-req: 5, #token: 11490, token usage: 0.30, gen throughput (token/s): 229.50, #queue-req: 0
  26703. 2025-08-24 23:26:03,617 - __main__ - INFO - sglang running req: 5 queue req: 0
  26704. 2025-08-24 23:26:03,857 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  26705. 2025-08-24 23:26:04,491 - sglang - INFO - [2025-08-24 23:26:04 TP0] Decode batch. #running-req: 5, #token: 11690, token usage: 0.31, gen throughput (token/s): 228.69, #queue-req: 0
  26706. 2025-08-24 23:26:04,491 - __main__ - INFO - sglang running req: 5 queue req: 0
  26707. 2025-08-24 23:26:05,366 - sglang - INFO - [2025-08-24 23:26:05 TP0] Decode batch. #running-req: 5, #token: 11890, token usage: 0.31, gen throughput (token/s): 228.53, #queue-req: 0
  26708. 2025-08-24 23:26:05,367 - __main__ - INFO - sglang running req: 5 queue req: 0
  26709. 2025-08-24 23:26:06,239 - sglang - INFO - [2025-08-24 23:26:06 TP0] Decode batch. #running-req: 3, #token: 7360, token usage: 0.19, gen throughput (token/s): 213.17, #queue-req: 0
  26710. 2025-08-24 23:26:06,239 - __main__ - INFO - sglang running req: 3 queue req: 0
  26711. 2025-08-24 23:26:07,093 - sglang - INFO - [2025-08-24 23:26:07 TP0] Decode batch. #running-req: 3, #token: 7480, token usage: 0.20, gen throughput (token/s): 140.51, #queue-req: 0
  26712. 2025-08-24 23:26:07,093 - __main__ - INFO - sglang running req: 3 queue req: 0
  26713. 2025-08-24 23:26:07,935 - sglang - INFO - [2025-08-24 23:26:07 TP0] Decode batch. #running-req: 1, #token: 2985, token usage: 0.08, gen throughput (token/s): 73.62, #queue-req: 0
  26714. 2025-08-24 23:26:07,935 - __main__ - INFO - sglang running req: 1 queue req: 0
  26715. 2025-08-24 23:26:08,770 - sglang - INFO - [2025-08-24 23:26:08 TP0] Decode batch. #running-req: 1, #token: 3025, token usage: 0.08, gen throughput (token/s): 47.92, #queue-req: 0
  26716. 2025-08-24 23:26:08,770 - __main__ - INFO - sglang running req: 1 queue req: 0
  26717. 2025-08-24 23:26:09,175 - __main__ - INFO - Finished TaskGroup for worker on c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
  26718. 2025-08-24 23:26:09,175 - __main__ - INFO - Got 1 docs for c8e80875b3bd75cd2f1ae72e45733f15ee7f5b3e
  26719. 2025-08-24 23:26:09,176 - __main__ - INFO - Worker 0 exiting due to empty queue
  26720. 2025-08-24 23:26:09,177 - __main__ - INFO - Work done
  26721. 2025-08-24 23:26:09,177 - __main__ - INFO - Got cancellation request for SGLang server
  26722. 2025-08-24 23:39:48,280 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26723. 2025-08-24 23:39:48,280 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
  26724. 2025-08-24 23:39:48,280 - __main__ - INFO - Found 1 total pdf paths to add
  26725. 2025-08-24 23:39:48,284 - __main__ - INFO - Calculated items_per_group: 2 based on average pages per PDF: 5.00
  26726. 2025-08-24 23:39:48,490 - __main__ - INFO - Starting pipeline with PID 478445
  26727. 2025-08-24 23:39:48,490 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26728. 2025-08-24 23:39:48,491 - __main__ - INFO - No work to do, exiting
  26729. 2025-08-24 23:44:29,672 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26730. 2025-08-24 23:44:29,672 - __main__ - INFO - Loading file at ./workspace/delivery.pdf as PDF document
  26731. 2025-08-24 23:44:29,672 - __main__ - INFO - Found 1 total pdf paths to add
  26732. 2025-08-24 23:44:29,674 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  26733. 2025-08-24 23:44:29,847 - __main__ - INFO - Starting pipeline with PID 478740
  26734. 2025-08-24 23:44:29,848 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26735. 2025-08-24 23:44:29,849 - __main__ - INFO - No work to do, exiting
  26736. 2025-08-24 23:45:28,141 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26737. 2025-08-24 23:45:28,142 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
  26738. 2025-08-24 23:45:28,142 - __main__ - INFO - Found 1 total pdf paths to add
  26739. 2025-08-24 23:45:28,144 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  26740. 2025-08-24 23:45:28,267 - __main__ - INFO - Starting pipeline with PID 478956
  26741. 2025-08-24 23:45:28,267 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26742. 2025-08-24 23:45:28,343 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  26743. 2025-08-24 23:45:29,372 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  26744. 2025-08-24 23:45:30,404 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  26745. 2025-08-24 23:45:31,455 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  26746. 2025-08-24 23:45:32,522 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  26747. 2025-08-24 23:45:33,591 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  26748. 2025-08-24 23:45:34,642 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  26749. 2025-08-24 23:45:34,880 - sglang - INFO - [2025-08-24 23:45:34] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=725225729, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26750. 2025-08-24 23:45:34,881 - __main__ - INFO - [2025-08-24 23:45:34] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=725225729, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26751. 2025-08-24 23:45:35,694 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  26752. 2025-08-24 23:45:35,839 - sglang - INFO - [2025-08-24 23:45:35] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26753. 2025-08-24 23:45:35,839 - __main__ - INFO - [2025-08-24 23:45:35] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26754. 2025-08-24 23:45:36,745 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  26755. 2025-08-24 23:45:37,815 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  26756. 2025-08-24 23:45:38,888 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  26757. 2025-08-24 23:45:39,962 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  26758. 2025-08-24 23:45:41,012 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  26759. 2025-08-24 23:45:41,949 - sglang - INFO - [2025-08-24 23:45:41 TP0] Overlap scheduler is disabled for multimodal models.
  26760. 2025-08-24 23:45:41,949 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Overlap scheduler is disabled for multimodal models.
  26761. 2025-08-24 23:45:41,951 - sglang - INFO - [2025-08-24 23:45:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26762. 2025-08-24 23:45:41,951 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26763. 2025-08-24 23:45:41,952 - sglang - INFO - [2025-08-24 23:45:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26764. 2025-08-24 23:45:41,952 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26765. 2025-08-24 23:45:41,952 - sglang - INFO - [2025-08-24 23:45:41 TP0] Init torch distributed begin.
  26766. 2025-08-24 23:45:41,952 - __main__ - INFO - [2025-08-24 23:45:41 TP0] Init torch distributed begin.
  26767. 2025-08-24 23:45:42,096 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  26768. 2025-08-24 23:45:43,131 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  26769. 2025-08-24 23:45:44,196 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  26770. 2025-08-24 23:45:45,274 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  26771. 2025-08-24 23:45:46,343 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  26772. 2025-08-24 23:45:47,280 - sglang - INFO - [2025-08-24 23:45:47 TP0] Load weight begin. avail mem=23.33 GB
  26773. 2025-08-24 23:45:47,281 - __main__ - INFO - [2025-08-24 23:45:47 TP0] Load weight begin. avail mem=23.33 GB
  26774. 2025-08-24 23:45:47,386 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  26775. 2025-08-24 23:45:47,790 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26776. 2025-08-24 23:45:47,790 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26777. 2025-08-24 23:45:48,442 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  26778. 2025-08-24 23:45:48,788 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.00it/s]
  26779. 2025-08-24 23:45:48,788 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.00it/s]
  26780. 2025-08-24 23:45:49,522 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  26781. 2025-08-24 23:45:49,887 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.06s/it]
  26782. 2025-08-24 23:45:49,887 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:02<00:02, 1.06s/it]
  26783. 2025-08-24 23:45:50,601 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  26784. 2025-08-24 23:45:50,977 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.07s/it]
  26785. 2025-08-24 23:45:50,977 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:03<00:01, 1.07s/it]
  26786. 2025-08-24 23:45:51,448 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  26787. 2025-08-24 23:45:51,448 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.20it/s]
  26788. 2025-08-24 23:45:51,448 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
  26789. 2025-08-24 23:45:51,448 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.09it/s]
  26790. 2025-08-24 23:45:51,448 - sglang - INFO -
  26791. 2025-08-24 23:45:51,448 - __main__ - INFO -
  26792. 2025-08-24 23:45:51,511 - sglang - INFO - [2025-08-24 23:45:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26793. 2025-08-24 23:45:51,511 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26794. 2025-08-24 23:45:51,517 - sglang - INFO - [2025-08-24 23:45:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26795. 2025-08-24 23:45:51,517 - __main__ - INFO - [2025-08-24 23:45:51 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26796. 2025-08-24 23:45:51,517 - sglang - INFO - [2025-08-24 23:45:51 TP0] Memory pool end. avail mem=5.30 GB
  26797. 2025-08-24 23:45:51,518 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Memory pool end. avail mem=5.30 GB
  26798. 2025-08-24 23:45:51,663 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  26799. 2025-08-24 23:45:51,666 - sglang - INFO - [2025-08-24 23:45:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  26800. 2025-08-24 23:45:51,666 - __main__ - INFO - [2025-08-24 23:45:51 TP0] Capture cuda graph begin. This can take up to several minutes.
  26801. 2025-08-24 23:45:52,737 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  26802. 2025-08-24 23:45:53,508 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.01it/s] 50%|█████ | 2/4 [00:01<00:01, 1.74it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.27it/s] 100%|██████████| 4/4 [00:01<00:00, 2.64it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  26803. 2025-08-24 23:45:53,509 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:00<00:02, 1.01it/s] 50%|█████ | 2/4 [00:01<00:01, 1.74it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.27it/s] 100%|██████████| 4/4 [00:01<00:00, 2.64it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  26804. 2025-08-24 23:45:53,509 - sglang - INFO - [2025-08-24 23:45:53 TP0] Capture cuda graph end. Time elapsed: 1.84 s
  26805. 2025-08-24 23:45:53,509 - __main__ - INFO - [2025-08-24 23:45:53 TP0] Capture cuda graph end. Time elapsed: 1.84 s
  26806. 2025-08-24 23:45:53,815 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  26807. 2025-08-24 23:45:54,222 - sglang - INFO - [2025-08-24 23:45:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26808. 2025-08-24 23:45:54,223 - __main__ - INFO - [2025-08-24 23:45:54 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26809. 2025-08-24 23:45:54,914 - __main__ - INFO - sglang server is ready.
  26810. 2025-08-24 23:45:54,914 - __main__ - INFO - Queue remaining: 1
  26811. 2025-08-24 23:45:54,914 - __main__ - INFO -
  26812. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26813. ----------------------------------------------------------------------------------
  26814. 2025-08-24 23:45:54,914 - __main__ - INFO -
  26815. Worker ID
  26816. ---------
  26817. 2025-08-24 23:45:54,915 - __main__ - INFO - Worker 0 processing work item 0dd0e3c651bff849c06afd78369d2b942edcf042
  26818. 2025-08-24 23:45:54,915 - __main__ - INFO - Created all tasks for 0dd0e3c651bff849c06afd78369d2b942edcf042
  26819. 2025-08-24 23:45:54,916 - __main__ - INFO - Got 1 pages to do for ./workspace/ambiguous.pdf in worker 0
  26820. 2025-08-24 23:45:55,309 - sglang - INFO - [2025-08-24 23:45:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26821. 2025-08-24 23:45:55,309 - __main__ - INFO - [2025-08-24 23:45:55 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26822. 2025-08-24 23:45:55,310 - __main__ - INFO - sglang running req: 0 queue req: 0
  26823. 2025-08-24 23:45:55,965 - sglang - INFO - [2025-08-24 23:45:55] The server is fired up and ready to roll!
  26824. 2025-08-24 23:45:55,965 - __main__ - INFO - [2025-08-24 23:45:55] The server is fired up and ready to roll!
  26825. 2025-08-24 23:46:01,367 - __main__ - INFO - Built page query for ./workspace/ambiguous.pdf-1
  26826. 2025-08-24 23:46:04,916 - __main__ - INFO - Queue remaining: 0
  26827. 2025-08-24 23:46:04,916 - __main__ - INFO -
  26828. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26829. ----------------------------------------------------------------------------------
  26830. 2025-08-24 23:46:04,916 - __main__ - INFO -
  26831. Worker ID | started
  26832. ----------+--------
  26833. 0 | 1
  26834. 2025-08-24 23:46:14,932 - __main__ - INFO - Queue remaining: 0
  26835. 2025-08-24 23:46:14,933 - __main__ - INFO -
  26836. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26837. ----------------------------------------------------------------------------------
  26838. 2025-08-24 23:46:14,933 - __main__ - INFO -
  26839. Worker ID | started
  26840. ----------+--------
  26841. 0 | 1
  26842. 2025-08-24 23:46:18,474 - sglang - INFO - [2025-08-24 23:46:18 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26843. 2025-08-24 23:46:18,474 - __main__ - INFO - sglang running req: 0 queue req: 0
  26844. 2025-08-24 23:46:19,964 - sglang - INFO - [2025-08-24 23:46:19 TP0] Decode batch. #running-req: 1, #token: 1189, token usage: 0.03, gen throughput (token/s): 1.55, #queue-req: 0
  26845. 2025-08-24 23:46:19,964 - __main__ - INFO - sglang running req: 1 queue req: 0
  26846. 2025-08-24 23:46:20,322 - __main__ - INFO - Finished TaskGroup for worker on 0dd0e3c651bff849c06afd78369d2b942edcf042
  26847. 2025-08-24 23:46:20,323 - __main__ - INFO - Got 1 docs for 0dd0e3c651bff849c06afd78369d2b942edcf042
  26848. 2025-08-24 23:46:20,324 - __main__ - INFO - Worker 0 exiting due to empty queue
  26849. 2025-08-24 23:46:20,324 - __main__ - INFO - Work done
  26850. 2025-08-24 23:46:20,325 - __main__ - INFO - Got cancellation request for SGLang server
  26851. 2025-08-24 23:47:59,474 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26852. 2025-08-24 23:47:59,475 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
  26853. 2025-08-24 23:47:59,475 - __main__ - INFO - Found 1 total pdf paths to add
  26854. 2025-08-24 23:47:59,476 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  26855. 2025-08-24 23:47:59,645 - __main__ - INFO - Starting pipeline with PID 480164
  26856. 2025-08-24 23:47:59,645 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26857. 2025-08-24 23:47:59,646 - __main__ - INFO - No work to do, exiting
  26858. 2025-08-24 23:53:01,828 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26859. 2025-08-24 23:53:01,829 - __main__ - INFO - Loading file at ./workspace/ambiguous.pdf as PDF document
  26860. 2025-08-24 23:53:01,829 - __main__ - INFO - Found 1 total pdf paths to add
  26861. 2025-08-24 23:53:01,830 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  26862. 2025-08-24 23:53:02,019 - __main__ - INFO - Starting pipeline with PID 480504
  26863. 2025-08-24 23:53:02,019 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26864. 2025-08-24 23:53:02,114 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  26865. 2025-08-24 23:53:03,152 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  26866. 2025-08-24 23:53:04,196 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  26867. 2025-08-24 23:53:05,257 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  26868. 2025-08-24 23:53:06,322 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  26869. 2025-08-24 23:53:07,392 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  26870. 2025-08-24 23:53:08,457 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  26871. 2025-08-24 23:53:09,199 - sglang - INFO - [2025-08-24 23:53:09] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901603346, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26872. 2025-08-24 23:53:09,199 - __main__ - INFO - [2025-08-24 23:53:09] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=901603346, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26873. 2025-08-24 23:53:09,537 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  26874. 2025-08-24 23:53:10,229 - sglang - INFO - [2025-08-24 23:53:10] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26875. 2025-08-24 23:53:10,229 - __main__ - INFO - [2025-08-24 23:53:10] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26876. 2025-08-24 23:53:10,573 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  26877. 2025-08-24 23:53:11,635 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  26878. 2025-08-24 23:53:12,701 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  26879. 2025-08-24 23:53:13,772 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  26880. 2025-08-24 23:53:14,843 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  26881. 2025-08-24 23:53:15,913 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  26882. 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Overlap scheduler is disabled for multimodal models.
  26883. 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Overlap scheduler is disabled for multimodal models.
  26884. 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26885. 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  26886. 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26887. 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  26888. 2025-08-24 23:53:16,982 - sglang - INFO - [2025-08-24 23:53:16 TP0] Init torch distributed begin.
  26889. 2025-08-24 23:53:16,982 - __main__ - INFO - [2025-08-24 23:53:16 TP0] Init torch distributed begin.
  26890. 2025-08-24 23:53:16,983 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  26891. 2025-08-24 23:53:18,059 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  26892. 2025-08-24 23:53:19,130 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  26893. 2025-08-24 23:53:20,202 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  26894. 2025-08-24 23:53:21,262 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  26895. 2025-08-24 23:53:22,249 - sglang - INFO - [2025-08-24 23:53:22 TP0] Load weight begin. avail mem=23.33 GB
  26896. 2025-08-24 23:53:22,249 - __main__ - INFO - [2025-08-24 23:53:22 TP0] Load weight begin. avail mem=23.33 GB
  26897. 2025-08-24 23:53:22,337 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  26898. 2025-08-24 23:53:22,777 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26899. 2025-08-24 23:53:22,777 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  26900. 2025-08-24 23:53:23,418 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  26901. 2025-08-24 23:53:23,722 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.06it/s]
  26902. 2025-08-24 23:53:23,722 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.06it/s]
  26903. 2025-08-24 23:53:24,492 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  26904. 2025-08-24 23:53:24,711 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.03it/s]
  26905. 2025-08-24 23:53:24,711 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.03it/s]
  26906. 2025-08-24 23:53:25,578 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  26907. 2025-08-24 23:53:25,679 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.03it/s]
  26908. 2025-08-24 23:53:25,679 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.03it/s]
  26909. 2025-08-24 23:53:26,073 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.35it/s]
  26910. 2025-08-24 23:53:26,073 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.35it/s]
  26911. 2025-08-24 23:53:26,073 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  26912. 2025-08-24 23:53:26,073 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.21it/s]
  26913. 2025-08-24 23:53:26,074 - sglang - INFO -
  26914. 2025-08-24 23:53:26,074 - __main__ - INFO -
  26915. 2025-08-24 23:53:26,119 - sglang - INFO - [2025-08-24 23:53:26 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26916. 2025-08-24 23:53:26,119 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  26917. 2025-08-24 23:53:26,125 - sglang - INFO - [2025-08-24 23:53:26 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26918. 2025-08-24 23:53:26,125 - __main__ - INFO - [2025-08-24 23:53:26 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  26919. 2025-08-24 23:53:26,125 - sglang - INFO - [2025-08-24 23:53:26 TP0] Memory pool end. avail mem=5.30 GB
  26920. 2025-08-24 23:53:26,125 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Memory pool end. avail mem=5.30 GB
  26921. 2025-08-24 23:53:26,277 - sglang - INFO - [2025-08-24 23:53:26 TP0] Capture cuda graph begin. This can take up to several minutes.
  26922. 2025-08-24 23:53:26,278 - __main__ - INFO - [2025-08-24 23:53:26 TP0] Capture cuda graph begin. This can take up to several minutes.
  26923. 2025-08-24 23:53:26,662 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  26924. 2025-08-24 23:53:27,742 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  26925. 2025-08-24 23:53:28,128 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.00s/it] 50%|█████ | 2/4 [00:01<00:01, 1.74it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s] 100%|██████████| 4/4 [00:01<00:00, 2.63it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  26926. 2025-08-24 23:53:28,128 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.00s/it] 50%|█████ | 2/4 [00:01<00:01, 1.74it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s] 100%|██████████| 4/4 [00:01<00:00, 2.63it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  26927. 2025-08-24 23:53:28,128 - sglang - INFO - [2025-08-24 23:53:28 TP0] Capture cuda graph end. Time elapsed: 1.85 s
  26928. 2025-08-24 23:53:28,128 - __main__ - INFO - [2025-08-24 23:53:28 TP0] Capture cuda graph end. Time elapsed: 1.85 s
  26929. 2025-08-24 23:53:28,822 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  26930. 2025-08-24 23:53:28,850 - sglang - INFO - [2025-08-24 23:53:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26931. 2025-08-24 23:53:28,850 - __main__ - INFO - [2025-08-24 23:53:28 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  26932. 2025-08-24 23:53:29,924 - __main__ - INFO - sglang server is ready.
  26933. 2025-08-24 23:53:29,925 - __main__ - INFO - Queue remaining: 1
  26934. 2025-08-24 23:53:29,925 - __main__ - INFO -
  26935. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26936. ----------------------------------------------------------------------------------
  26937. 2025-08-24 23:53:29,925 - __main__ - INFO -
  26938. Worker ID
  26939. ---------
  26940. 2025-08-24 23:53:29,925 - __main__ - INFO - Worker 0 processing work item 0dd0e3c651bff849c06afd78369d2b942edcf042
  26941. 2025-08-24 23:53:29,925 - __main__ - INFO - Created all tasks for 0dd0e3c651bff849c06afd78369d2b942edcf042
  26942. 2025-08-24 23:53:29,927 - __main__ - INFO - Got 1 pages to do for ./workspace/ambiguous.pdf in worker 0
  26943. 2025-08-24 23:53:29,938 - sglang - INFO - [2025-08-24 23:53:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26944. 2025-08-24 23:53:29,939 - __main__ - INFO - [2025-08-24 23:53:29 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26945. 2025-08-24 23:53:29,939 - __main__ - INFO - sglang running req: 0 queue req: 0
  26946. 2025-08-24 23:53:30,519 - sglang - INFO - [2025-08-24 23:53:30] The server is fired up and ready to roll!
  26947. 2025-08-24 23:53:30,519 - __main__ - INFO - [2025-08-24 23:53:30] The server is fired up and ready to roll!
  26948. 2025-08-24 23:53:36,342 - __main__ - INFO - Built page query for ./workspace/ambiguous.pdf-1
  26949. 2025-08-24 23:53:39,932 - __main__ - INFO - Queue remaining: 0
  26950. 2025-08-24 23:53:39,933 - __main__ - INFO -
  26951. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26952. ----------------------------------------------------------------------------------
  26953. 2025-08-24 23:53:39,933 - __main__ - INFO -
  26954. Worker ID | started
  26955. ----------+--------
  26956. 0 | 1
  26957. 2025-08-24 23:53:49,934 - __main__ - INFO - Queue remaining: 0
  26958. 2025-08-24 23:53:49,935 - __main__ - INFO -
  26959. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  26960. ----------------------------------------------------------------------------------
  26961. 2025-08-24 23:53:49,935 - __main__ - INFO -
  26962. Worker ID | started
  26963. ----------+--------
  26964. 0 | 1
  26965. 2025-08-24 23:53:53,350 - sglang - INFO - [2025-08-24 23:53:53 TP0] Prefill batch. #new-seq: 1, #new-token: 1156, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  26966. 2025-08-24 23:53:53,351 - __main__ - INFO - sglang running req: 0 queue req: 0
  26967. 2025-08-24 23:53:54,801 - sglang - INFO - [2025-08-24 23:53:54 TP0] Decode batch. #running-req: 1, #token: 0, token usage: 0.00, gen throughput (token/s): 1.54, #queue-req: 0
  26968. 2025-08-24 23:53:54,802 - __main__ - INFO - sglang running req: 1 queue req: 0
  26969. 2025-08-24 23:53:54,811 - __main__ - INFO - Finished TaskGroup for worker on 0dd0e3c651bff849c06afd78369d2b942edcf042
  26970. 2025-08-24 23:53:54,811 - __main__ - INFO - Got 1 docs for 0dd0e3c651bff849c06afd78369d2b942edcf042
  26971. 2025-08-24 23:53:54,812 - __main__ - INFO - Worker 0 exiting due to empty queue
  26972. 2025-08-24 23:53:54,813 - __main__ - INFO - Work done
  26973. 2025-08-24 23:53:54,813 - __main__ - INFO - Got cancellation request for SGLang server
  26974. 2025-08-24 23:55:05,116 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  26975. 2025-08-24 23:55:05,116 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
  26976. 2025-08-24 23:55:05,116 - __main__ - INFO - Found 1 total pdf paths to add
  26977. 2025-08-24 23:55:05,120 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  26978. 2025-08-24 23:55:05,301 - __main__ - INFO - Starting pipeline with PID 481640
  26979. 2025-08-24 23:55:05,301 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  26980. 2025-08-24 23:55:05,382 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  26981. 2025-08-24 23:55:06,416 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  26982. 2025-08-24 23:55:07,473 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  26983. 2025-08-24 23:55:08,516 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  26984. 2025-08-24 23:55:09,556 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  26985. 2025-08-24 23:55:10,609 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  26986. 2025-08-24 23:55:11,668 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  26987. 2025-08-24 23:55:12,409 - sglang - INFO - [2025-08-24 23:55:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=677962409, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26988. 2025-08-24 23:55:12,410 - __main__ - INFO - [2025-08-24 23:55:12] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=677962409, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  26989. 2025-08-24 23:55:12,736 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  26990. 2025-08-24 23:55:13,443 - sglang - INFO - [2025-08-24 23:55:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26991. 2025-08-24 23:55:13,443 - __main__ - INFO - [2025-08-24 23:55:13] Use chat template for the OpenAI-compatible API server: qwen2-vl
  26992. 2025-08-24 23:55:13,816 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  26993. 2025-08-24 23:55:14,848 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  26994. 2025-08-24 23:55:15,910 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  26995. 2025-08-24 23:55:16,976 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  26996. 2025-08-24 23:55:18,042 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  26997. 2025-08-24 23:55:19,107 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  26998. 2025-08-24 23:55:19,664 - sglang - INFO - [2025-08-24 23:55:19 TP0] Overlap scheduler is disabled for multimodal models.
  26999. 2025-08-24 23:55:19,664 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Overlap scheduler is disabled for multimodal models.
  27000. 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27001. 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27002. 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27003. 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27004. 2025-08-24 23:55:19,666 - sglang - INFO - [2025-08-24 23:55:19 TP0] Init torch distributed begin.
  27005. 2025-08-24 23:55:19,666 - __main__ - INFO - [2025-08-24 23:55:19 TP0] Init torch distributed begin.
  27006. 2025-08-24 23:55:20,191 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  27007. 2025-08-24 23:55:21,264 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  27008. 2025-08-24 23:55:22,326 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  27009. 2025-08-24 23:55:23,394 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  27010. 2025-08-24 23:55:24,439 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  27011. 2025-08-24 23:55:24,996 - sglang - INFO - [2025-08-24 23:55:24 TP0] Load weight begin. avail mem=23.33 GB
  27012. 2025-08-24 23:55:24,996 - __main__ - INFO - [2025-08-24 23:55:24 TP0] Load weight begin. avail mem=23.33 GB
  27013. 2025-08-24 23:55:25,484 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  27014. 2025-08-24 23:55:25,535 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27015. 2025-08-24 23:55:25,535 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27016. 2025-08-24 23:55:26,374 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
  27017. 2025-08-24 23:55:26,374 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
  27018. 2025-08-24 23:55:26,530 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  27019. 2025-08-24 23:55:27,269 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
  27020. 2025-08-24 23:55:27,269 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.15it/s]
  27021. 2025-08-24 23:55:27,577 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  27022. 2025-08-24 23:55:28,240 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
  27023. 2025-08-24 23:55:28,240 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.09it/s]
  27024. 2025-08-24 23:55:28,622 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  27025. 2025-08-24 23:55:28,666 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.38it/s]
  27026. 2025-08-24 23:55:28,666 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.38it/s]
  27027. 2025-08-24 23:55:28,666 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
  27028. 2025-08-24 23:55:28,666 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:03<00:00, 1.28it/s]
  27029. 2025-08-24 23:55:28,666 - sglang - INFO -
  27030. 2025-08-24 23:55:28,666 - __main__ - INFO -
  27031. 2025-08-24 23:55:28,721 - sglang - INFO - [2025-08-24 23:55:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27032. 2025-08-24 23:55:28,722 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27033. 2025-08-24 23:55:28,730 - sglang - INFO - [2025-08-24 23:55:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27034. 2025-08-24 23:55:28,730 - __main__ - INFO - [2025-08-24 23:55:28 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27035. 2025-08-24 23:55:28,730 - sglang - INFO - [2025-08-24 23:55:28 TP0] Memory pool end. avail mem=5.30 GB
  27036. 2025-08-24 23:55:28,730 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Memory pool end. avail mem=5.30 GB
  27037. 2025-08-24 23:55:28,916 - sglang - INFO - [2025-08-24 23:55:28 TP0] Capture cuda graph begin. This can take up to several minutes.
  27038. 2025-08-24 23:55:28,916 - __main__ - INFO - [2025-08-24 23:55:28 TP0] Capture cuda graph begin. This can take up to several minutes.
  27039. 2025-08-24 23:55:29,667 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  27040. 2025-08-24 23:55:30,715 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  27041. 2025-08-24 23:55:30,964 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.19s/it] 50%|█████ | 2/4 [00:01<00:01, 1.50it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.04it/s] 100%|██████████| 4/4 [00:02<00:00, 2.47it/s] 100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
  27042. 2025-08-24 23:55:30,964 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.19s/it] 50%|█████ | 2/4 [00:01<00:01, 1.50it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.04it/s] 100%|██████████| 4/4 [00:02<00:00, 2.47it/s] 100%|██████████| 4/4 [00:02<00:00, 1.96it/s]
  27043. 2025-08-24 23:55:30,964 - sglang - INFO - [2025-08-24 23:55:30 TP0] Capture cuda graph end. Time elapsed: 2.05 s
  27044. 2025-08-24 23:55:30,964 - __main__ - INFO - [2025-08-24 23:55:30 TP0] Capture cuda graph end. Time elapsed: 2.05 s
  27045. 2025-08-24 23:55:31,675 - sglang - INFO - [2025-08-24 23:55:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27046. 2025-08-24 23:55:31,676 - __main__ - INFO - [2025-08-24 23:55:31 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27047. 2025-08-24 23:55:31,760 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready...
  27048. 2025-08-24 23:55:32,804 - sglang - INFO - [2025-08-24 23:55:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27049. 2025-08-24 23:55:32,804 - __main__ - INFO - [2025-08-24 23:55:32 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27050. 2025-08-24 23:55:32,804 - __main__ - INFO - sglang running req: 0 queue req: 0
  27051. 2025-08-24 23:55:32,854 - __main__ - INFO - sglang server is ready.
  27052. 2025-08-24 23:55:32,854 - __main__ - INFO - Queue remaining: 1
  27053. 2025-08-24 23:55:32,854 - __main__ - INFO -
  27054. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27055. ----------------------------------------------------------------------------------
  27056. 2025-08-24 23:55:32,854 - __main__ - INFO -
  27057. Worker ID
  27058. ---------
  27059. 2025-08-24 23:55:32,855 - __main__ - INFO - Worker 0 processing work item 064eedd4edcd817030605d106353694b3e3ec8b1
  27060. 2025-08-24 23:55:32,855 - __main__ - INFO - Created all tasks for 064eedd4edcd817030605d106353694b3e3ec8b1
  27061. 2025-08-24 23:55:32,863 - __main__ - INFO - Got 1 pages to do for ./workspace/map1.pdf in worker 0
  27062. 2025-08-24 23:55:33,327 - sglang - INFO - [2025-08-24 23:55:33] The server is fired up and ready to roll!
  27063. 2025-08-24 23:55:33,327 - __main__ - INFO - [2025-08-24 23:55:33] The server is fired up and ready to roll!
  27064. 2025-08-24 23:55:42,855 - __main__ - INFO - Queue remaining: 0
  27065. 2025-08-24 23:55:42,855 - __main__ - INFO -
  27066. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27067. ----------------------------------------------------------------------------------
  27068. 2025-08-24 23:55:42,855 - __main__ - INFO -
  27069. Worker ID | started
  27070. ----------+--------
  27071. 0 | 1
  27072. 2025-08-24 23:55:44,265 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27073. 2025-08-24 23:55:44,435 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
  27074. 2025-08-24 23:55:52,933 - __main__ - INFO - Queue remaining: 0
  27075. 2025-08-24 23:55:52,933 - __main__ - INFO -
  27076. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27077. ----------------------------------------------------------------------------------
  27078. 2025-08-24 23:55:52,933 - __main__ - INFO -
  27079. Worker ID | started
  27080. ----------+--------
  27081. 0 | 1
  27082. 2025-08-24 23:55:58,157 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27083. 2025-08-24 23:56:02,935 - __main__ - INFO - Queue remaining: 0
  27084. 2025-08-24 23:56:02,935 - __main__ - INFO -
  27085. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27086. ----------------------------------------------------------------------------------
  27087. 2025-08-24 23:56:02,935 - __main__ - INFO -
  27088. Worker ID | started
  27089. ----------+--------
  27090. 0 | 1
  27091. 2025-08-24 23:56:03,558 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  27092. 2025-08-24 23:56:05,826 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27093. 2025-08-24 23:56:06,133 - __main__ - WARNING - ValueError on attempt 1 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27094. 2025-08-24 23:56:10,964 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27095. 2025-08-24 23:56:11,286 - __main__ - WARNING - ValueError on attempt 2 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27096. 2025-08-24 23:56:12,936 - __main__ - INFO - Queue remaining: 0
  27097. 2025-08-24 23:56:12,937 - __main__ - INFO -
  27098. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27099. ----------------------------------------------------------------------------------
  27100. 2025-08-24 23:56:12,937 - __main__ - INFO -
  27101. Worker ID | started
  27102. ----------+--------
  27103. 0 | 1
  27104. 2025-08-24 23:56:15,671 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27105. 2025-08-24 23:56:16,009 - __main__ - WARNING - ValueError on attempt 3 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27106. 2025-08-24 23:56:20,410 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27107. 2025-08-24 23:56:20,768 - __main__ - WARNING - ValueError on attempt 4 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27108. 2025-08-24 23:56:22,938 - __main__ - INFO - Queue remaining: 0
  27109. 2025-08-24 23:56:22,938 - __main__ - INFO -
  27110. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27111. ----------------------------------------------------------------------------------
  27112. 2025-08-24 23:56:22,939 - __main__ - INFO -
  27113. Worker ID | started
  27114. ----------+--------
  27115. 0 | 1
  27116. 2025-08-24 23:56:25,382 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27117. 2025-08-24 23:56:25,713 - __main__ - WARNING - ValueError on attempt 5 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27118. 2025-08-24 23:56:29,904 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27119. 2025-08-24 23:56:30,241 - __main__ - WARNING - ValueError on attempt 6 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27120. 2025-08-24 23:56:32,940 - __main__ - INFO - Queue remaining: 0
  27121. 2025-08-24 23:56:32,940 - __main__ - INFO -
  27122. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27123. ----------------------------------------------------------------------------------
  27124. 2025-08-24 23:56:32,940 - __main__ - INFO -
  27125. Worker ID | started
  27126. ----------+--------
  27127. 0 | 1
  27128. 2025-08-24 23:56:34,598 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27129. 2025-08-24 23:56:34,937 - __main__ - WARNING - ValueError on attempt 7 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27130. 2025-08-24 23:56:34,937 - __main__ - ERROR - Failed to process ./workspace/map1.pdf-1 after 8 attempts.
  27131. 2025-08-24 23:56:35,309 - __main__ - ERROR - Document ./workspace/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
  27132. 2025-08-24 23:56:35,310 - __main__ - INFO - Finished TaskGroup for worker on 064eedd4edcd817030605d106353694b3e3ec8b1
  27133. 2025-08-24 23:56:35,310 - __main__ - INFO - Got 0 docs for 064eedd4edcd817030605d106353694b3e3ec8b1
  27134. 2025-08-24 23:56:35,311 - __main__ - INFO - Worker 0 exiting due to empty queue
  27135. 2025-08-24 23:56:35,312 - __main__ - INFO - Work done
  27136. 2025-08-24 23:56:35,312 - __main__ - INFO - Got cancellation request for SGLang server
  27137. 2025-08-24 23:57:04,821 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  27138. 2025-08-24 23:57:04,821 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
  27139. 2025-08-24 23:57:04,821 - __main__ - INFO - Found 1 total pdf paths to add
  27140. 2025-08-24 23:57:04,825 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  27141. 2025-08-24 23:57:05,000 - __main__ - INFO - Starting pipeline with PID 482844
  27142. 2025-08-24 23:57:05,000 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  27143. 2025-08-24 23:57:05,073 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  27144. 2025-08-24 23:57:06,104 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  27145. 2025-08-24 23:57:07,161 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  27146. 2025-08-24 23:57:08,218 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  27147. 2025-08-24 23:57:09,267 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  27148. 2025-08-24 23:57:10,371 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  27149. 2025-08-24 23:57:11,124 - sglang - INFO - [2025-08-24 23:57:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=608298291, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27150. 2025-08-24 23:57:11,124 - __main__ - INFO - [2025-08-24 23:57:11] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=608298291, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27151. 2025-08-24 23:57:11,431 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  27152. 2025-08-24 23:57:12,069 - sglang - INFO - [2025-08-24 23:57:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27153. 2025-08-24 23:57:12,069 - __main__ - INFO - [2025-08-24 23:57:12] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27154. 2025-08-24 23:57:12,504 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  27155. 2025-08-24 23:57:13,550 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  27156. 2025-08-24 23:57:14,596 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  27157. 2025-08-24 23:57:15,643 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  27158. 2025-08-24 23:57:16,689 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  27159. 2025-08-24 23:57:17,735 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  27160. 2025-08-24 23:57:18,780 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  27161. 2025-08-24 23:57:19,094 - sglang - INFO - [2025-08-24 23:57:19 TP0] Overlap scheduler is disabled for multimodal models.
  27162. 2025-08-24 23:57:19,094 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Overlap scheduler is disabled for multimodal models.
  27163. 2025-08-24 23:57:19,097 - sglang - INFO - [2025-08-24 23:57:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27164. 2025-08-24 23:57:19,097 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27165. 2025-08-24 23:57:19,097 - sglang - INFO - [2025-08-24 23:57:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27166. 2025-08-24 23:57:19,097 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27167. 2025-08-24 23:57:19,098 - sglang - INFO - [2025-08-24 23:57:19 TP0] Init torch distributed begin.
  27168. 2025-08-24 23:57:19,098 - __main__ - INFO - [2025-08-24 23:57:19 TP0] Init torch distributed begin.
  27169. 2025-08-24 23:57:19,859 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  27170. 2025-08-24 23:57:20,488 - __main__ - INFO - Got cancellation request for SGLang server
  27171. 2025-08-24 23:57:42,899 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  27172. 2025-08-24 23:57:42,900 - __main__ - INFO - Loading file at ./workspace/map1.pdf as PDF document
  27173. 2025-08-24 23:57:42,900 - __main__ - INFO - Found 1 total pdf paths to add
  27174. 2025-08-24 23:57:42,904 - __main__ - INFO - Calculated items_per_group: 10 based on average pages per PDF: 1.00
  27175. 2025-08-24 23:57:43,097 - __main__ - INFO - Starting pipeline with PID 483718
  27176. 2025-08-24 23:57:43,097 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  27177. 2025-08-24 23:57:43,196 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  27178. 2025-08-24 23:57:44,232 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  27179. 2025-08-24 23:57:45,288 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  27180. 2025-08-24 23:57:46,333 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  27181. 2025-08-24 23:57:47,364 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  27182. 2025-08-24 23:57:48,478 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  27183. 2025-08-24 23:57:49,539 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  27184. 2025-08-24 23:57:49,692 - sglang - INFO - [2025-08-24 23:57:49] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1010487791, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27185. 2025-08-24 23:57:49,692 - __main__ - INFO - [2025-08-24 23:57:49] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1010487791, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27186. 2025-08-24 23:57:50,619 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  27187. 2025-08-24 23:57:50,815 - sglang - INFO - [2025-08-24 23:57:50] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27188. 2025-08-24 23:57:50,816 - __main__ - INFO - [2025-08-24 23:57:50] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27189. 2025-08-24 23:57:51,700 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  27190. 2025-08-24 23:57:52,767 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  27191. 2025-08-24 23:57:53,831 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  27192. 2025-08-24 23:57:54,956 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  27193. 2025-08-24 23:57:56,012 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  27194. 2025-08-24 23:57:57,079 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  27195. 2025-08-24 23:57:57,540 - sglang - INFO - [2025-08-24 23:57:57 TP0] Overlap scheduler is disabled for multimodal models.
  27196. 2025-08-24 23:57:57,540 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Overlap scheduler is disabled for multimodal models.
  27197. 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27198. 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27199. 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27200. 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27201. 2025-08-24 23:57:57,544 - sglang - INFO - [2025-08-24 23:57:57 TP0] Init torch distributed begin.
  27202. 2025-08-24 23:57:57,544 - __main__ - INFO - [2025-08-24 23:57:57 TP0] Init torch distributed begin.
  27203. 2025-08-24 23:57:58,147 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  27204. 2025-08-24 23:57:59,210 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  27205. 2025-08-24 23:58:00,268 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  27206. 2025-08-24 23:58:01,321 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  27207. 2025-08-24 23:58:02,367 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  27208. 2025-08-24 23:58:02,851 - sglang - INFO - [2025-08-24 23:58:02 TP0] Load weight begin. avail mem=23.33 GB
  27209. 2025-08-24 23:58:02,851 - __main__ - INFO - [2025-08-24 23:58:02 TP0] Load weight begin. avail mem=23.33 GB
  27210. 2025-08-24 23:58:03,420 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27211. 2025-08-24 23:58:03,420 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27212. 2025-08-24 23:58:03,421 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  27213. 2025-08-24 23:58:04,199 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.24it/s]
  27214. 2025-08-24 23:58:04,199 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.24it/s]
  27215. 2025-08-24 23:58:04,476 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  27216. 2025-08-24 23:58:05,056 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
  27217. 2025-08-24 23:58:05,056 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.19it/s]
  27218. 2025-08-24 23:58:05,519 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  27219. 2025-08-24 23:58:05,872 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
  27220. 2025-08-24 23:58:05,872 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.21it/s]
  27221. 2025-08-24 23:58:06,216 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.57it/s]
  27222. 2025-08-24 23:58:06,216 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.57it/s]
  27223. 2025-08-24 23:58:06,216 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.42it/s]
  27224. 2025-08-24 23:58:06,216 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.42it/s]
  27225. 2025-08-24 23:58:06,216 - sglang - INFO -
  27226. 2025-08-24 23:58:06,216 - __main__ - INFO -
  27227. 2025-08-24 23:58:06,262 - sglang - INFO - [2025-08-24 23:58:06 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27228. 2025-08-24 23:58:06,262 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27229. 2025-08-24 23:58:06,268 - sglang - INFO - [2025-08-24 23:58:06 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27230. 2025-08-24 23:58:06,268 - __main__ - INFO - [2025-08-24 23:58:06 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27231. 2025-08-24 23:58:06,268 - sglang - INFO - [2025-08-24 23:58:06 TP0] Memory pool end. avail mem=5.30 GB
  27232. 2025-08-24 23:58:06,268 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Memory pool end. avail mem=5.30 GB
  27233. 2025-08-24 23:58:06,416 - sglang - INFO - [2025-08-24 23:58:06 TP0] Capture cuda graph begin. This can take up to several minutes.
  27234. 2025-08-24 23:58:06,416 - __main__ - INFO - [2025-08-24 23:58:06 TP0] Capture cuda graph begin. This can take up to several minutes.
  27235. 2025-08-24 23:58:06,567 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  27236. 2025-08-24 23:58:07,607 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  27237. 2025-08-24 23:58:08,260 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.71it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s] 100%|██████████| 4/4 [00:01<00:00, 2.68it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  27238. 2025-08-24 23:58:08,260 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.02s/it] 50%|█████ | 2/4 [00:01<00:01, 1.71it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.26it/s] 100%|██████████| 4/4 [00:01<00:00, 2.68it/s] 100%|██████████| 4/4 [00:01<00:00, 2.17it/s]
  27239. 2025-08-24 23:58:08,260 - sglang - INFO - [2025-08-24 23:58:08 TP0] Capture cuda graph end. Time elapsed: 1.84 s
  27240. 2025-08-24 23:58:08,260 - __main__ - INFO - [2025-08-24 23:58:08 TP0] Capture cuda graph end. Time elapsed: 1.84 s
  27241. 2025-08-24 23:58:08,651 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  27242. 2025-08-24 23:58:08,965 - sglang - INFO - [2025-08-24 23:58:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27243. 2025-08-24 23:58:08,965 - __main__ - INFO - [2025-08-24 23:58:08 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27244. 2025-08-24 23:58:09,708 - __main__ - INFO - sglang server is ready.
  27245. 2025-08-24 23:58:09,709 - __main__ - INFO - Queue remaining: 1
  27246. 2025-08-24 23:58:09,709 - __main__ - INFO -
  27247. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27248. ----------------------------------------------------------------------------------
  27249. 2025-08-24 23:58:09,709 - __main__ - INFO -
  27250. Worker ID
  27251. ---------
  27252. 2025-08-24 23:58:09,709 - __main__ - INFO - Worker 0 processing work item 064eedd4edcd817030605d106353694b3e3ec8b1
  27253. 2025-08-24 23:58:09,709 - __main__ - INFO - Created all tasks for 064eedd4edcd817030605d106353694b3e3ec8b1
  27254. 2025-08-24 23:58:09,717 - __main__ - INFO - Got 1 pages to do for ./workspace/map1.pdf in worker 0
  27255. 2025-08-24 23:58:10,044 - sglang - INFO - [2025-08-24 23:58:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27256. 2025-08-24 23:58:10,044 - __main__ - INFO - [2025-08-24 23:58:10 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27257. 2025-08-24 23:58:10,045 - __main__ - INFO - sglang running req: 0 queue req: 0
  27258. 2025-08-24 23:58:10,677 - sglang - INFO - [2025-08-24 23:58:10] The server is fired up and ready to roll!
  27259. 2025-08-24 23:58:10,678 - __main__ - INFO - [2025-08-24 23:58:10] The server is fired up and ready to roll!
  27260. 2025-08-24 23:58:19,710 - __main__ - INFO - Queue remaining: 0
  27261. 2025-08-24 23:58:19,710 - __main__ - INFO -
  27262. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27263. ----------------------------------------------------------------------------------
  27264. 2025-08-24 23:58:19,711 - __main__ - INFO -
  27265. Worker ID | started
  27266. ----------+--------
  27267. 0 | 1
  27268. 2025-08-24 23:58:21,237 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27269. 2025-08-24 23:58:21,388 - sglang - INFO - Token indices sequence length is longer than the specified maximum sequence length for this model (78749 > 32768). Running this sequence through the model will result in indexing errors
  27270. 2025-08-24 23:58:29,733 - __main__ - INFO - Queue remaining: 0
  27271. 2025-08-24 23:58:29,733 - __main__ - INFO -
  27272. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27273. ----------------------------------------------------------------------------------
  27274. 2025-08-24 23:58:29,733 - __main__ - INFO -
  27275. Worker ID | started
  27276. ----------+--------
  27277. 0 | 1
  27278. 2025-08-24 23:58:34,535 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27279. 2025-08-24 23:58:39,734 - __main__ - INFO - Queue remaining: 0
  27280. 2025-08-24 23:58:39,735 - __main__ - INFO -
  27281. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27282. ----------------------------------------------------------------------------------
  27283. 2025-08-24 23:58:39,735 - __main__ - INFO -
  27284. Worker ID | started
  27285. ----------+--------
  27286. 0 | 1
  27287. 2025-08-24 23:58:41,440 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  27288. 2025-08-24 23:58:42,288 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27289. 2025-08-24 23:58:42,596 - __main__ - WARNING - ValueError on attempt 1 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27290. 2025-08-24 23:58:47,737 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27291. 2025-08-24 23:58:48,060 - __main__ - WARNING - ValueError on attempt 2 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27292. 2025-08-24 23:58:49,737 - __main__ - INFO - Queue remaining: 0
  27293. 2025-08-24 23:58:49,738 - __main__ - INFO -
  27294. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27295. ----------------------------------------------------------------------------------
  27296. 2025-08-24 23:58:49,738 - __main__ - INFO -
  27297. Worker ID | started
  27298. ----------+--------
  27299. 0 | 1
  27300. 2025-08-24 23:58:52,705 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27301. 2025-08-24 23:58:53,022 - __main__ - WARNING - ValueError on attempt 3 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27302. 2025-08-24 23:58:57,211 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27303. 2025-08-24 23:58:57,582 - __main__ - WARNING - ValueError on attempt 4 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27304. 2025-08-24 23:58:59,739 - __main__ - INFO - Queue remaining: 0
  27305. 2025-08-24 23:58:59,739 - __main__ - INFO -
  27306. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27307. ----------------------------------------------------------------------------------
  27308. 2025-08-24 23:58:59,739 - __main__ - INFO -
  27309. Worker ID | started
  27310. ----------+--------
  27311. 0 | 1
  27312. 2025-08-24 23:59:01,984 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27313. 2025-08-24 23:59:02,323 - __main__ - WARNING - ValueError on attempt 5 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27314. 2025-08-24 23:59:06,369 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27315. 2025-08-24 23:59:06,681 - __main__ - WARNING - ValueError on attempt 6 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27316. 2025-08-24 23:59:09,741 - __main__ - INFO - Queue remaining: 0
  27317. 2025-08-24 23:59:09,741 - __main__ - INFO -
  27318. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27319. ----------------------------------------------------------------------------------
  27320. 2025-08-24 23:59:09,741 - __main__ - INFO -
  27321. Worker ID | started
  27322. ----------+--------
  27323. 0 | 1
  27324. 2025-08-24 23:59:10,935 - __main__ - INFO - Built page query for ./workspace/map1.pdf-1
  27325. 2025-08-24 23:59:11,295 - __main__ - WARNING - ValueError on attempt 7 for ./workspace/map1.pdf-1: <class 'ValueError'> - Got BadRequestError from server: b'{"object":"error","message":"The input (78749 tokens) is longer than the model\'s context length (32768 tokens).","type":"BadRequestError","param":null,"code":400}', skipping this response
  27326. 2025-08-24 23:59:11,296 - __main__ - ERROR - Failed to process ./workspace/map1.pdf-1 after 8 attempts.
  27327. 2025-08-24 23:59:11,666 - __main__ - ERROR - Document ./workspace/map1.pdf has 1 fallback pages out of 1 exceeding max_page_error_rate of 0.004, discarding document.
  27328. 2025-08-24 23:59:11,667 - __main__ - INFO - Finished TaskGroup for worker on 064eedd4edcd817030605d106353694b3e3ec8b1
  27329. 2025-08-24 23:59:11,667 - __main__ - INFO - Got 0 docs for 064eedd4edcd817030605d106353694b3e3ec8b1
  27330. 2025-08-24 23:59:11,668 - __main__ - INFO - Worker 0 exiting due to empty queue
  27331. 2025-08-24 23:59:11,668 - __main__ - INFO - Work done
  27332. 2025-08-24 23:59:11,669 - __main__ - INFO - Got cancellation request for SGLang server
  27333. 2025-08-24 23:59:37,224 - __main__ - INFO - Got --pdfs argument, going to add to the work queue
  27334. 2025-08-24 23:59:37,224 - __main__ - INFO - Loading file at ./workspace/UNETR.pdf as PDF document
  27335. 2025-08-24 23:59:37,224 - __main__ - INFO - Found 1 total pdf paths to add
  27336. 2025-08-24 23:59:37,230 - __main__ - INFO - Calculated items_per_group: 1 based on average pages per PDF: 11.00
  27337. 2025-08-24 23:59:37,413 - __main__ - INFO - Starting pipeline with PID 484898
  27338. 2025-08-24 23:59:37,413 - __main__ - INFO - Using local model path at '/root/llm/olmOCR-7B-0225-preview'
  27339. 2025-08-24 23:59:37,499 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready...
  27340. 2025-08-24 23:59:38,535 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready...
  27341. 2025-08-24 23:59:39,594 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready...
  27342. 2025-08-24 23:59:40,666 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready...
  27343. 2025-08-24 23:59:41,733 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready...
  27344. 2025-08-24 23:59:42,799 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready...
  27345. 2025-08-24 23:59:43,852 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready...
  27346. 2025-08-24 23:59:44,133 - sglang - INFO - [2025-08-24 23:59:44] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1008456358, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27347. 2025-08-24 23:59:44,133 - __main__ - INFO - [2025-08-24 23:59:44] server_args=ServerArgs(model_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_path='/root/llm/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='/root/llm/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30026, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=2048, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1008456358, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=8, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None)
  27348. 2025-08-24 23:59:44,921 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready...
  27349. 2025-08-24 23:59:45,150 - sglang - INFO - [2025-08-24 23:59:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27350. 2025-08-24 23:59:45,151 - __main__ - INFO - [2025-08-24 23:59:45] Use chat template for the OpenAI-compatible API server: qwen2-vl
  27351. 2025-08-24 23:59:45,968 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready...
  27352. 2025-08-24 23:59:47,035 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready...
  27353. 2025-08-24 23:59:48,100 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready...
  27354. 2025-08-24 23:59:49,173 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready...
  27355. 2025-08-24 23:59:50,243 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready...
  27356. 2025-08-24 23:59:51,314 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready...
  27357. 2025-08-24 23:59:51,579 - sglang - INFO - [2025-08-24 23:59:51 TP0] Overlap scheduler is disabled for multimodal models.
  27358. 2025-08-24 23:59:51,579 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Overlap scheduler is disabled for multimodal models.
  27359. 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27360. 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model.
  27361. 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27362. 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl.
  27363. 2025-08-24 23:59:51,581 - sglang - INFO - [2025-08-24 23:59:51 TP0] Init torch distributed begin.
  27364. 2025-08-24 23:59:51,581 - __main__ - INFO - [2025-08-24 23:59:51 TP0] Init torch distributed begin.
  27365. 2025-08-24 23:59:52,346 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready...
  27366. 2025-08-24 23:59:53,409 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready...
  27367. 2025-08-24 23:59:54,483 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready...
  27368. 2025-08-24 23:59:55,552 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready...
  27369. 2025-08-24 23:59:56,603 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready...
  27370. 2025-08-24 23:59:56,899 - sglang - INFO - [2025-08-24 23:59:56 TP0] Load weight begin. avail mem=23.33 GB
  27371. 2025-08-24 23:59:56,900 - __main__ - INFO - [2025-08-24 23:59:56 TP0] Load weight begin. avail mem=23.33 GB
  27372. 2025-08-24 23:59:57,458 - sglang - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27373. 2025-08-24 23:59:57,458 - __main__ - INFO - Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
  27374. 2025-08-24 23:59:57,678 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready...
  27375. 2025-08-24 23:59:58,298 - sglang - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
  27376. 2025-08-24 23:59:58,298 - __main__ - INFO - Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:02, 1.19it/s]
  27377. 2025-08-24 23:59:58,753 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready...
  27378. 2025-08-24 23:59:59,177 - sglang - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.16it/s]
  27379. 2025-08-24 23:59:59,177 - __main__ - INFO - Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:01<00:01, 1.16it/s]
  27380. 2025-08-24 23:59:59,821 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready...
  27381. 2025-08-25 00:00:00,029 - sglang - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.17it/s]
  27382. 2025-08-25 00:00:00,029 - __main__ - INFO - Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:02<00:00, 1.17it/s]
  27383. 2025-08-25 00:00:00,388 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.51it/s]
  27384. 2025-08-25 00:00:00,388 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.51it/s]
  27385. 2025-08-25 00:00:00,388 - sglang - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.37it/s]
  27386. 2025-08-25 00:00:00,388 - __main__ - INFO - Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.37it/s]
  27387. 2025-08-25 00:00:00,388 - sglang - INFO -
  27388. 2025-08-25 00:00:00,388 - __main__ - INFO -
  27389. 2025-08-25 00:00:00,434 - sglang - INFO - [2025-08-25 00:00:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27390. 2025-08-25 00:00:00,435 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Load weight end. type=Qwen2VLForConditionalGeneration, dtype=torch.bfloat16, avail mem=7.63 GB
  27391. 2025-08-25 00:00:00,440 - sglang - INFO - [2025-08-25 00:00:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27392. 2025-08-25 00:00:00,440 - __main__ - INFO - [2025-08-25 00:00:00 TP0] KV Cache is allocated. K size: 1.01 GB, V size: 1.01 GB.
  27393. 2025-08-25 00:00:00,441 - sglang - INFO - [2025-08-25 00:00:00 TP0] Memory pool end. avail mem=5.30 GB
  27394. 2025-08-25 00:00:00,441 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Memory pool end. avail mem=5.30 GB
  27395. 2025-08-25 00:00:00,590 - sglang - INFO - [2025-08-25 00:00:00 TP0] Capture cuda graph begin. This can take up to several minutes.
  27396. 2025-08-25 00:00:00,590 - __main__ - INFO - [2025-08-25 00:00:00 TP0] Capture cuda graph begin. This can take up to several minutes.
  27397. 2025-08-25 00:00:00,881 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready...
  27398. 2025-08-25 00:00:01,968 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready...
  27399. 2025-08-25 00:00:02,463 - sglang - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.03s/it] 50%|█████ | 2/4 [00:01<00:01, 1.70it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.61it/s] 100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
  27400. 2025-08-25 00:00:02,463 - __main__ - INFO - 0%| | 0/4 [00:00<?, ?it/s] 25%|██▌ | 1/4 [00:01<00:03, 1.03s/it] 50%|█████ | 2/4 [00:01<00:01, 1.70it/s] 75%|███████▌ | 3/4 [00:01<00:00, 2.25it/s] 100%|██████████| 4/4 [00:01<00:00, 2.61it/s] 100%|██████████| 4/4 [00:01<00:00, 2.14it/s]
  27401. 2025-08-25 00:00:02,463 - sglang - INFO - [2025-08-25 00:00:02 TP0] Capture cuda graph end. Time elapsed: 1.87 s
  27402. 2025-08-25 00:00:02,463 - __main__ - INFO - [2025-08-25 00:00:02 TP0] Capture cuda graph end. Time elapsed: 1.87 s
  27403. 2025-08-25 00:00:03,057 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready...
  27404. 2025-08-25 00:00:03,168 - sglang - INFO - [2025-08-25 00:00:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27405. 2025-08-25 00:00:03,169 - __main__ - INFO - [2025-08-25 00:00:03 TP0] max_total_num_tokens=37987, chunked_prefill_size=-1, max_prefill_tokens=16384, max_running_requests=2049, context_len=32768
  27406. 2025-08-25 00:00:04,142 - __main__ - INFO - sglang server is ready.
  27407. 2025-08-25 00:00:04,143 - __main__ - INFO - Queue remaining: 1
  27408. 2025-08-25 00:00:04,143 - __main__ - INFO -
  27409. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27410. ----------------------------------------------------------------------------------
  27411. 2025-08-25 00:00:04,143 - __main__ - INFO -
  27412. Worker ID
  27413. ---------
  27414. 2025-08-25 00:00:04,143 - __main__ - INFO - Worker 0 processing work item 73c9399482ed5cf37e1888c000e49ef82a30c10d
  27415. 2025-08-25 00:00:04,143 - __main__ - INFO - Created all tasks for 73c9399482ed5cf37e1888c000e49ef82a30c10d
  27416. 2025-08-25 00:00:04,153 - __main__ - INFO - Got 11 pages to do for ./workspace/UNETR.pdf in worker 0
  27417. 2025-08-25 00:00:04,247 - sglang - INFO - [2025-08-25 00:00:04 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27418. 2025-08-25 00:00:04,247 - __main__ - INFO - [2025-08-25 00:00:04 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27419. 2025-08-25 00:00:04,247 - __main__ - INFO - sglang running req: 0 queue req: 0
  27420. 2025-08-25 00:00:04,964 - sglang - INFO - [2025-08-25 00:00:04] The server is fired up and ready to roll!
  27421. 2025-08-25 00:00:04,964 - __main__ - INFO - [2025-08-25 00:00:04] The server is fired up and ready to roll!
  27422. 2025-08-25 00:00:11,073 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-2
  27423. 2025-08-25 00:00:11,082 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-1
  27424. 2025-08-25 00:00:11,124 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-3
  27425. 2025-08-25 00:00:11,142 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-6
  27426. 2025-08-25 00:00:11,144 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
  27427. 2025-08-25 00:00:11,154 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-7
  27428. 2025-08-25 00:00:11,165 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-11
  27429. 2025-08-25 00:00:11,175 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-8
  27430. 2025-08-25 00:00:11,178 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-9
  27431. 2025-08-25 00:00:11,179 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-10
  27432. 2025-08-25 00:00:11,250 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-4
  27433. 2025-08-25 00:00:14,144 - __main__ - INFO - Queue remaining: 0
  27434. 2025-08-25 00:00:14,145 - __main__ - INFO -
  27435. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27436. ----------------------------------------------------------------------------------
  27437. 2025-08-25 00:00:14,145 - __main__ - INFO -
  27438. Worker ID | started
  27439. ----------+--------
  27440. 0 | 11
  27441. 2025-08-25 00:00:24,147 - __main__ - INFO - Queue remaining: 0
  27442. 2025-08-25 00:00:24,147 - __main__ - INFO -
  27443. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27444. ----------------------------------------------------------------------------------
  27445. 2025-08-25 00:00:24,147 - __main__ - INFO -
  27446. Worker ID | started
  27447. ----------+--------
  27448. 0 | 11
  27449. 2025-08-25 00:00:28,213 - sglang - INFO - [2025-08-25 00:00:28 TP0] Prefill batch. #new-seq: 1, #new-token: 3390, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27450. 2025-08-25 00:00:28,213 - __main__ - INFO - sglang running req: 0 queue req: 0
  27451. 2025-08-25 00:00:29,414 - sglang - INFO - [2025-08-25 00:00:29 TP0] Prefill batch. #new-seq: 4, #new-token: 14323, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.09, #running-req: 1, #queue-req: 6
  27452. 2025-08-25 00:00:29,414 - __main__ - INFO - sglang running req: 1 queue req: 6
  27453. 2025-08-25 00:00:34,149 - __main__ - INFO - Queue remaining: 0
  27454. 2025-08-25 00:00:34,149 - __main__ - INFO -
  27455. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27456. ----------------------------------------------------------------------------------
  27457. 2025-08-25 00:00:34,149 - __main__ - INFO -
  27458. Worker ID | started
  27459. ----------+--------
  27460. 0 | 11
  27461. 2025-08-25 00:00:34,174 - sglang - INFO - [2025-08-25 00:00:34 TP0] Decode batch. #running-req: 5, #token: 17878, token usage: 0.47, gen throughput (token/s): 5.55, #queue-req: 6
  27462. 2025-08-25 00:00:34,174 - __main__ - INFO - sglang running req: 5 queue req: 6
  27463. 2025-08-25 00:00:35,058 - sglang - INFO - [2025-08-25 00:00:35 TP0] Decode batch. #running-req: 5, #token: 18078, token usage: 0.48, gen throughput (token/s): 226.02, #queue-req: 6
  27464. 2025-08-25 00:00:35,059 - __main__ - INFO - sglang running req: 5 queue req: 6
  27465. 2025-08-25 00:00:35,945 - sglang - INFO - [2025-08-25 00:00:35 TP0] Decode batch. #running-req: 5, #token: 18278, token usage: 0.48, gen throughput (token/s): 225.66, #queue-req: 6
  27466. 2025-08-25 00:00:35,945 - __main__ - INFO - sglang running req: 5 queue req: 6
  27467. 2025-08-25 00:00:36,831 - sglang - INFO - [2025-08-25 00:00:36 TP0] Decode batch. #running-req: 5, #token: 18478, token usage: 0.49, gen throughput (token/s): 225.59, #queue-req: 6
  27468. 2025-08-25 00:00:36,832 - __main__ - INFO - sglang running req: 5 queue req: 6
  27469. 2025-08-25 00:00:37,718 - sglang - INFO - [2025-08-25 00:00:37 TP0] Decode batch. #running-req: 5, #token: 18678, token usage: 0.49, gen throughput (token/s): 225.66, #queue-req: 6
  27470. 2025-08-25 00:00:37,718 - __main__ - INFO - sglang running req: 5 queue req: 6
  27471. 2025-08-25 00:00:38,476 - sglang - INFO - [2025-08-25 00:00:38 TP0] Prefill batch. #new-seq: 2, #new-token: 7744, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.45, #running-req: 4, #queue-req: 4
  27472. 2025-08-25 00:00:38,476 - __main__ - INFO - sglang running req: 4 queue req: 4
  27473. 2025-08-25 00:00:40,686 - sglang - INFO - [2025-08-25 00:00:40 TP0] Decode batch. #running-req: 6, #token: 24906, token usage: 0.66, gen throughput (token/s): 69.07, #queue-req: 4
  27474. 2025-08-25 00:00:40,686 - __main__ - INFO - sglang running req: 6 queue req: 4
  27475. 2025-08-25 00:00:41,595 - sglang - INFO - [2025-08-25 00:00:41 TP0] Decode batch. #running-req: 6, #token: 25146, token usage: 0.66, gen throughput (token/s): 263.89, #queue-req: 4
  27476. 2025-08-25 00:00:41,596 - __main__ - INFO - sglang running req: 6 queue req: 4
  27477. 2025-08-25 00:00:42,508 - sglang - INFO - [2025-08-25 00:00:42 TP0] Decode batch. #running-req: 6, #token: 25386, token usage: 0.67, gen throughput (token/s): 263.09, #queue-req: 4
  27478. 2025-08-25 00:00:42,508 - __main__ - INFO - sglang running req: 6 queue req: 4
  27479. 2025-08-25 00:00:43,420 - sglang - INFO - [2025-08-25 00:00:43 TP0] Decode batch. #running-req: 6, #token: 25626, token usage: 0.67, gen throughput (token/s): 262.92, #queue-req: 4
  27480. 2025-08-25 00:00:43,421 - __main__ - INFO - sglang running req: 6 queue req: 4
  27481. 2025-08-25 00:00:44,151 - __main__ - INFO - Queue remaining: 0
  27482. 2025-08-25 00:00:44,151 - __main__ - INFO -
  27483. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27484. ----------------------------------------------------------------------------------
  27485. sglang_input_tokens 22.33 22.33
  27486. sglang_output_tokens 3.39 3.39
  27487. 2025-08-25 00:00:44,152 - __main__ - INFO -
  27488. Worker ID | finished | started
  27489. ----------+----------+--------
  27490. 0 | 1 | 11
  27491. 2025-08-25 00:00:44,333 - sglang - INFO - [2025-08-25 00:00:44 TP0] Decode batch. #running-req: 6, #token: 25866, token usage: 0.68, gen throughput (token/s): 263.04, #queue-req: 4
  27492. 2025-08-25 00:00:44,333 - __main__ - INFO - sglang running req: 6 queue req: 4
  27493. 2025-08-25 00:00:45,247 - sglang - INFO - [2025-08-25 00:00:45 TP0] Decode batch. #running-req: 6, #token: 26106, token usage: 0.69, gen throughput (token/s): 262.55, #queue-req: 4
  27494. 2025-08-25 00:00:45,247 - __main__ - INFO - sglang running req: 6 queue req: 4
  27495. 2025-08-25 00:00:46,161 - sglang - INFO - [2025-08-25 00:00:46 TP0] Decode batch. #running-req: 6, #token: 26346, token usage: 0.69, gen throughput (token/s): 262.55, #queue-req: 4
  27496. 2025-08-25 00:00:46,161 - __main__ - INFO - sglang running req: 6 queue req: 4
  27497. 2025-08-25 00:00:47,076 - sglang - INFO - [2025-08-25 00:00:47 TP0] Decode batch. #running-req: 6, #token: 26586, token usage: 0.70, gen throughput (token/s): 262.17, #queue-req: 4
  27498. 2025-08-25 00:00:47,077 - __main__ - INFO - sglang running req: 6 queue req: 4
  27499. 2025-08-25 00:00:47,992 - sglang - INFO - [2025-08-25 00:00:47 TP0] Decode batch. #running-req: 6, #token: 26826, token usage: 0.71, gen throughput (token/s): 262.07, #queue-req: 4
  27500. 2025-08-25 00:00:47,992 - __main__ - INFO - sglang running req: 6 queue req: 4
  27501. 2025-08-25 00:00:48,909 - sglang - INFO - [2025-08-25 00:00:48 TP0] Decode batch. #running-req: 6, #token: 27066, token usage: 0.71, gen throughput (token/s): 261.91, #queue-req: 4
  27502. 2025-08-25 00:00:48,909 - __main__ - INFO - sglang running req: 6 queue req: 4
  27503. 2025-08-25 00:00:49,826 - sglang - INFO - [2025-08-25 00:00:49 TP0] Decode batch. #running-req: 6, #token: 27306, token usage: 0.72, gen throughput (token/s): 261.67, #queue-req: 4
  27504. 2025-08-25 00:00:49,826 - __main__ - INFO - sglang running req: 6 queue req: 4
  27505. 2025-08-25 00:00:50,744 - sglang - INFO - [2025-08-25 00:00:50 TP0] Decode batch. #running-req: 6, #token: 27546, token usage: 0.73, gen throughput (token/s): 261.37, #queue-req: 4
  27506. 2025-08-25 00:00:50,744 - __main__ - INFO - sglang running req: 6 queue req: 4
  27507. 2025-08-25 00:00:51,640 - sglang - INFO - [2025-08-25 00:00:51 TP0] Prefill batch. #new-seq: 1, #new-token: 3935, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 5, #queue-req: 3
  27508. 2025-08-25 00:00:51,640 - __main__ - INFO - sglang running req: 5 queue req: 3
  27509. 2025-08-25 00:00:52,715 - sglang - INFO - [2025-08-25 00:00:52 TP0] Decode batch. #running-req: 6, #token: 27740, token usage: 0.73, gen throughput (token/s): 121.25, #queue-req: 3
  27510. 2025-08-25 00:00:52,715 - __main__ - INFO - sglang running req: 6 queue req: 3
  27511. 2025-08-25 00:00:53,630 - sglang - INFO - [2025-08-25 00:00:53 TP0] Decode batch. #running-req: 6, #token: 27980, token usage: 0.74, gen throughput (token/s): 262.42, #queue-req: 3
  27512. 2025-08-25 00:00:53,630 - __main__ - INFO - sglang running req: 6 queue req: 3
  27513. 2025-08-25 00:00:54,154 - __main__ - INFO - Queue remaining: 0
  27514. 2025-08-25 00:00:54,154 - __main__ - INFO -
  27515. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27516. ----------------------------------------------------------------------------------
  27517. sglang_input_tokens 61.92 61.92
  27518. sglang_output_tokens 12.20 12.20
  27519. 2025-08-25 00:00:54,155 - __main__ - INFO -
  27520. Worker ID | finished | started
  27521. ----------+----------+--------
  27522. 0 | 2 | 11
  27523. 2025-08-25 00:00:54,545 - sglang - INFO - [2025-08-25 00:00:54 TP0] Decode batch. #running-req: 6, #token: 28220, token usage: 0.74, gen throughput (token/s): 262.13, #queue-req: 3
  27524. 2025-08-25 00:00:54,545 - __main__ - INFO - sglang running req: 6 queue req: 3
  27525. 2025-08-25 00:00:54,706 - sglang - INFO - [2025-08-25 00:00:54 TP0] Prefill batch. #new-seq: 1, #new-token: 3827, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.66, #running-req: 5, #queue-req: 2
  27526. 2025-08-25 00:00:54,706 - __main__ - INFO - sglang running req: 5 queue req: 2
  27527. 2025-08-25 00:00:56,499 - sglang - INFO - [2025-08-25 00:00:56 TP0] Decode batch. #running-req: 6, #token: 28948, token usage: 0.76, gen throughput (token/s): 122.33, #queue-req: 2
  27528. 2025-08-25 00:00:56,499 - __main__ - INFO - sglang running req: 6 queue req: 2
  27529. 2025-08-25 00:00:57,418 - sglang - INFO - [2025-08-25 00:00:57 TP0] Decode batch. #running-req: 6, #token: 29188, token usage: 0.77, gen throughput (token/s): 261.19, #queue-req: 2
  27530. 2025-08-25 00:00:57,418 - __main__ - INFO - sglang running req: 6 queue req: 2
  27531. 2025-08-25 00:00:58,338 - sglang - INFO - [2025-08-25 00:00:58 TP0] Decode batch. #running-req: 6, #token: 29428, token usage: 0.77, gen throughput (token/s): 260.81, #queue-req: 2
  27532. 2025-08-25 00:00:58,338 - __main__ - INFO - sglang running req: 6 queue req: 2
  27533. 2025-08-25 00:00:59,258 - sglang - INFO - [2025-08-25 00:00:59 TP0] Decode batch. #running-req: 6, #token: 29668, token usage: 0.78, gen throughput (token/s): 260.89, #queue-req: 2
  27534. 2025-08-25 00:00:59,258 - __main__ - INFO - sglang running req: 6 queue req: 2
  27535. 2025-08-25 00:00:59,625 - sglang - INFO - [2025-08-25 00:00:59 TP0] Prefill batch. #new-seq: 1, #new-token: 3860, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.63, #running-req: 5, #queue-req: 1
  27536. 2025-08-25 00:00:59,625 - __main__ - INFO - sglang running req: 5 queue req: 1
  27537. 2025-08-25 00:01:01,222 - sglang - INFO - [2025-08-25 00:01:01 TP0] Decode batch. #running-req: 6, #token: 28048, token usage: 0.74, gen throughput (token/s): 121.67, #queue-req: 1
  27538. 2025-08-25 00:01:01,223 - __main__ - INFO - sglang running req: 6 queue req: 1
  27539. 2025-08-25 00:01:02,141 - sglang - INFO - [2025-08-25 00:01:02 TP0] Decode batch. #running-req: 6, #token: 28288, token usage: 0.74, gen throughput (token/s): 261.19, #queue-req: 1
  27540. 2025-08-25 00:01:02,141 - __main__ - INFO - sglang running req: 6 queue req: 1
  27541. 2025-08-25 00:01:03,059 - sglang - INFO - [2025-08-25 00:01:03 TP0] Decode batch. #running-req: 6, #token: 28528, token usage: 0.75, gen throughput (token/s): 261.54, #queue-req: 1
  27542. 2025-08-25 00:01:03,059 - __main__ - INFO - sglang running req: 6 queue req: 1
  27543. 2025-08-25 00:01:03,977 - sglang - INFO - [2025-08-25 00:01:03 TP0] Decode batch. #running-req: 6, #token: 28768, token usage: 0.76, gen throughput (token/s): 261.29, #queue-req: 1
  27544. 2025-08-25 00:01:03,978 - __main__ - INFO - sglang running req: 6 queue req: 1
  27545. 2025-08-25 00:01:04,156 - __main__ - INFO - Queue remaining: 0
  27546. 2025-08-25 00:01:04,156 - __main__ - INFO -
  27547. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27548. ----------------------------------------------------------------------------------
  27549. sglang_input_tokens 143.86 143.86
  27550. sglang_output_tokens 25.93 25.93
  27551. 2025-08-25 00:01:04,157 - __main__ - INFO -
  27552. Worker ID | finished | started
  27553. ----------+----------+--------
  27554. 0 | 4 | 11
  27555. 2025-08-25 00:01:04,895 - sglang - INFO - [2025-08-25 00:01:04 TP0] Decode batch. #running-req: 6, #token: 29008, token usage: 0.76, gen throughput (token/s): 261.60, #queue-req: 1
  27556. 2025-08-25 00:01:04,895 - __main__ - INFO - sglang running req: 6 queue req: 1
  27557. 2025-08-25 00:01:05,812 - sglang - INFO - [2025-08-25 00:01:05 TP0] Decode batch. #running-req: 6, #token: 29248, token usage: 0.77, gen throughput (token/s): 261.74, #queue-req: 1
  27558. 2025-08-25 00:01:05,812 - __main__ - INFO - sglang running req: 6 queue req: 1
  27559. 2025-08-25 00:01:05,996 - sglang - INFO - [2025-08-25 00:01:05 TP0] Prefill batch. #new-seq: 1, #new-token: 3792, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.65, #running-req: 5, #queue-req: 0
  27560. 2025-08-25 00:01:05,996 - __main__ - INFO - sglang running req: 5 queue req: 0
  27561. 2025-08-25 00:01:06,698 - __main__ - INFO - Semaphore released, allowing a worker to proceed.
  27562. 2025-08-25 00:01:07,768 - sglang - INFO - [2025-08-25 00:01:07 TP0] Decode batch. #running-req: 6, #token: 28689, token usage: 0.76, gen throughput (token/s): 122.17, #queue-req: 0
  27563. 2025-08-25 00:01:07,768 - __main__ - INFO - sglang running req: 6 queue req: 0
  27564. 2025-08-25 00:01:08,687 - sglang - INFO - [2025-08-25 00:01:08 TP0] Decode batch. #running-req: 6, #token: 28929, token usage: 0.76, gen throughput (token/s): 261.04, #queue-req: 0
  27565. 2025-08-25 00:01:08,688 - __main__ - INFO - sglang running req: 6 queue req: 0
  27566. 2025-08-25 00:01:09,605 - sglang - INFO - [2025-08-25 00:01:09 TP0] Decode batch. #running-req: 6, #token: 29169, token usage: 0.77, gen throughput (token/s): 261.46, #queue-req: 0
  27567. 2025-08-25 00:01:09,606 - __main__ - INFO - sglang running req: 6 queue req: 0
  27568. 2025-08-25 00:01:10,525 - sglang - INFO - [2025-08-25 00:01:10 TP0] Decode batch. #running-req: 6, #token: 29409, token usage: 0.77, gen throughput (token/s): 261.02, #queue-req: 0
  27569. 2025-08-25 00:01:10,525 - __main__ - INFO - sglang running req: 6 queue req: 0
  27570. 2025-08-25 00:01:11,444 - sglang - INFO - [2025-08-25 00:01:11 TP0] Decode batch. #running-req: 6, #token: 29649, token usage: 0.78, gen throughput (token/s): 261.16, #queue-req: 0
  27571. 2025-08-25 00:01:11,444 - __main__ - INFO - sglang running req: 6 queue req: 0
  27572. 2025-08-25 00:01:12,364 - sglang - INFO - [2025-08-25 00:01:12 TP0] Decode batch. #running-req: 6, #token: 29889, token usage: 0.79, gen throughput (token/s): 260.81, #queue-req: 0
  27573. 2025-08-25 00:01:12,364 - __main__ - INFO - sglang running req: 6 queue req: 0
  27574. 2025-08-25 00:01:13,286 - sglang - INFO - [2025-08-25 00:01:13 TP0] Decode batch. #running-req: 6, #token: 30129, token usage: 0.79, gen throughput (token/s): 260.49, #queue-req: 0
  27575. 2025-08-25 00:01:13,286 - __main__ - INFO - sglang running req: 6 queue req: 0
  27576. 2025-08-25 00:01:14,158 - __main__ - INFO - Queue remaining: 0
  27577. 2025-08-25 00:01:14,159 - __main__ - INFO -
  27578. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27579. ----------------------------------------------------------------------------------
  27580. sglang_input_tokens 163.99 163.99
  27581. sglang_output_tokens 35.64 35.64
  27582. 2025-08-25 00:01:14,159 - __main__ - INFO -
  27583. Worker ID | finished | started
  27584. ----------+----------+--------
  27585. 0 | 5 | 11
  27586. 2025-08-25 00:01:14,207 - sglang - INFO - [2025-08-25 00:01:14 TP0] Decode batch. #running-req: 6, #token: 30369, token usage: 0.80, gen throughput (token/s): 260.52, #queue-req: 0
  27587. 2025-08-25 00:01:14,207 - __main__ - INFO - sglang running req: 6 queue req: 0
  27588. 2025-08-25 00:01:15,128 - sglang - INFO - [2025-08-25 00:01:15 TP0] Decode batch. #running-req: 5, #token: 24750, token usage: 0.65, gen throughput (token/s): 254.11, #queue-req: 0
  27589. 2025-08-25 00:01:15,128 - __main__ - INFO - sglang running req: 5 queue req: 0
  27590. 2025-08-25 00:01:16,033 - sglang - INFO - [2025-08-25 00:01:16 TP0] Decode batch. #running-req: 5, #token: 24950, token usage: 0.66, gen throughput (token/s): 220.87, #queue-req: 0
  27591. 2025-08-25 00:01:16,033 - __main__ - INFO - sglang running req: 5 queue req: 0
  27592. 2025-08-25 00:01:16,941 - sglang - INFO - [2025-08-25 00:01:16 TP0] Decode batch. #running-req: 5, #token: 25150, token usage: 0.66, gen throughput (token/s): 220.33, #queue-req: 0
  27593. 2025-08-25 00:01:16,941 - __main__ - INFO - sglang running req: 5 queue req: 0
  27594. 2025-08-25 00:01:17,830 - sglang - INFO - [2025-08-25 00:01:17 TP0] Decode batch. #running-req: 4, #token: 20786, token usage: 0.55, gen throughput (token/s): 180.98, #queue-req: 0
  27595. 2025-08-25 00:01:17,831 - __main__ - INFO - sglang running req: 4 queue req: 0
  27596. 2025-08-25 00:01:18,707 - sglang - INFO - [2025-08-25 00:01:18 TP0] Decode batch. #running-req: 3, #token: 16010, token usage: 0.42, gen throughput (token/s): 137.99, #queue-req: 0
  27597. 2025-08-25 00:01:18,707 - __main__ - INFO - sglang running req: 3 queue req: 0
  27598. 2025-08-25 00:01:19,584 - sglang - INFO - [2025-08-25 00:01:19 TP0] Decode batch. #running-req: 3, #token: 16130, token usage: 0.42, gen throughput (token/s): 136.93, #queue-req: 0
  27599. 2025-08-25 00:01:19,584 - __main__ - INFO - sglang running req: 3 queue req: 0
  27600. 2025-08-25 00:01:20,460 - sglang - INFO - [2025-08-25 00:01:20 TP0] Decode batch. #running-req: 3, #token: 16250, token usage: 0.43, gen throughput (token/s): 136.88, #queue-req: 0
  27601. 2025-08-25 00:01:20,460 - __main__ - INFO - sglang running req: 3 queue req: 0
  27602. 2025-08-25 00:01:21,338 - sglang - INFO - [2025-08-25 00:01:21 TP0] Decode batch. #running-req: 3, #token: 16370, token usage: 0.43, gen throughput (token/s): 136.79, #queue-req: 0
  27603. 2025-08-25 00:01:21,338 - __main__ - INFO - sglang running req: 3 queue req: 0
  27604. 2025-08-25 00:01:22,215 - sglang - INFO - [2025-08-25 00:01:22 TP0] Decode batch. #running-req: 3, #token: 16490, token usage: 0.43, gen throughput (token/s): 136.80, #queue-req: 0
  27605. 2025-08-25 00:01:22,215 - __main__ - INFO - sglang running req: 3 queue req: 0
  27606. 2025-08-25 00:01:23,091 - sglang - INFO - [2025-08-25 00:01:23 TP0] Decode batch. #running-req: 3, #token: 16610, token usage: 0.44, gen throughput (token/s): 136.97, #queue-req: 0
  27607. 2025-08-25 00:01:23,091 - __main__ - INFO - sglang running req: 3 queue req: 0
  27608. 2025-08-25 00:01:23,970 - sglang - INFO - [2025-08-25 00:01:23 TP0] Decode batch. #running-req: 3, #token: 16730, token usage: 0.44, gen throughput (token/s): 136.56, #queue-req: 0
  27609. 2025-08-25 00:01:23,970 - __main__ - INFO - sglang running req: 3 queue req: 0
  27610. 2025-08-25 00:01:24,160 - __main__ - INFO - Queue remaining: 0
  27611. 2025-08-25 00:01:24,160 - __main__ - INFO -
  27612. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27613. ----------------------------------------------------------------------------------
  27614. sglang_input_tokens 261.80 261.80
  27615. sglang_output_tokens 62.01 62.01
  27616. 2025-08-25 00:01:24,160 - __main__ - INFO -
  27617. Worker ID | finished | started
  27618. ----------+----------+--------
  27619. 0 | 8 | 11
  27620. 2025-08-25 00:01:24,848 - sglang - INFO - [2025-08-25 00:01:24 TP0] Decode batch. #running-req: 3, #token: 16850, token usage: 0.44, gen throughput (token/s): 136.64, #queue-req: 0
  27621. 2025-08-25 00:01:24,848 - __main__ - INFO - sglang running req: 3 queue req: 0
  27622. 2025-08-25 00:01:25,728 - sglang - INFO - [2025-08-25 00:01:25 TP0] Decode batch. #running-req: 3, #token: 16970, token usage: 0.45, gen throughput (token/s): 136.30, #queue-req: 0
  27623. 2025-08-25 00:01:25,728 - __main__ - INFO - sglang running req: 3 queue req: 0
  27624. 2025-08-25 00:01:26,606 - sglang - INFO - [2025-08-25 00:01:26 TP0] Decode batch. #running-req: 3, #token: 17090, token usage: 0.45, gen throughput (token/s): 136.68, #queue-req: 0
  27625. 2025-08-25 00:01:26,607 - __main__ - INFO - sglang running req: 3 queue req: 0
  27626. 2025-08-25 00:01:27,486 - sglang - INFO - [2025-08-25 00:01:27 TP0] Decode batch. #running-req: 3, #token: 17210, token usage: 0.45, gen throughput (token/s): 136.40, #queue-req: 0
  27627. 2025-08-25 00:01:27,486 - __main__ - INFO - sglang running req: 3 queue req: 0
  27628. 2025-08-25 00:01:28,367 - sglang - INFO - [2025-08-25 00:01:28 TP0] Decode batch. #running-req: 3, #token: 17330, token usage: 0.46, gen throughput (token/s): 136.26, #queue-req: 0
  27629. 2025-08-25 00:01:28,367 - __main__ - INFO - sglang running req: 3 queue req: 0
  27630. 2025-08-25 00:01:29,247 - sglang - INFO - [2025-08-25 00:01:29 TP0] Decode batch. #running-req: 3, #token: 17450, token usage: 0.46, gen throughput (token/s): 136.26, #queue-req: 0
  27631. 2025-08-25 00:01:29,248 - __main__ - INFO - sglang running req: 3 queue req: 0
  27632. 2025-08-25 00:01:30,128 - sglang - INFO - [2025-08-25 00:01:30 TP0] Decode batch. #running-req: 3, #token: 17570, token usage: 0.46, gen throughput (token/s): 136.21, #queue-req: 0
  27633. 2025-08-25 00:01:30,129 - __main__ - INFO - sglang running req: 3 queue req: 0
  27634. 2025-08-25 00:01:31,009 - sglang - INFO - [2025-08-25 00:01:31 TP0] Decode batch. #running-req: 3, #token: 17690, token usage: 0.47, gen throughput (token/s): 136.25, #queue-req: 0
  27635. 2025-08-25 00:01:31,009 - __main__ - INFO - sglang running req: 3 queue req: 0
  27636. 2025-08-25 00:01:31,890 - sglang - INFO - [2025-08-25 00:01:31 TP0] Decode batch. #running-req: 3, #token: 17810, token usage: 0.47, gen throughput (token/s): 136.24, #queue-req: 0
  27637. 2025-08-25 00:01:31,890 - __main__ - INFO - sglang running req: 3 queue req: 0
  27638. 2025-08-25 00:01:32,772 - sglang - INFO - [2025-08-25 00:01:32 TP0] Decode batch. #running-req: 3, #token: 17930, token usage: 0.47, gen throughput (token/s): 136.04, #queue-req: 0
  27639. 2025-08-25 00:01:32,772 - __main__ - INFO - sglang running req: 3 queue req: 0
  27640. 2025-08-25 00:01:33,656 - sglang - INFO - [2025-08-25 00:01:33 TP0] Decode batch. #running-req: 3, #token: 18050, token usage: 0.48, gen throughput (token/s): 135.74, #queue-req: 0
  27641. 2025-08-25 00:01:33,656 - __main__ - INFO - sglang running req: 3 queue req: 0
  27642. 2025-08-25 00:01:34,162 - __main__ - INFO - Queue remaining: 0
  27643. 2025-08-25 00:01:34,162 - __main__ - INFO -
  27644. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27645. ----------------------------------------------------------------------------------
  27646. sglang_input_tokens 239.41 239.41
  27647. sglang_output_tokens 56.71 56.71
  27648. 2025-08-25 00:01:34,162 - __main__ - INFO -
  27649. Worker ID | finished | started
  27650. ----------+----------+--------
  27651. 0 | 8 | 11
  27652. 2025-08-25 00:01:34,539 - sglang - INFO - [2025-08-25 00:01:34 TP0] Decode batch. #running-req: 3, #token: 18170, token usage: 0.48, gen throughput (token/s): 135.86, #queue-req: 0
  27653. 2025-08-25 00:01:34,540 - __main__ - INFO - sglang running req: 3 queue req: 0
  27654. 2025-08-25 00:01:35,424 - sglang - INFO - [2025-08-25 00:01:35 TP0] Decode batch. #running-req: 3, #token: 18290, token usage: 0.48, gen throughput (token/s): 135.70, #queue-req: 0
  27655. 2025-08-25 00:01:35,424 - __main__ - INFO - sglang running req: 3 queue req: 0
  27656. 2025-08-25 00:01:36,308 - sglang - INFO - [2025-08-25 00:01:36 TP0] Decode batch. #running-req: 3, #token: 18410, token usage: 0.48, gen throughput (token/s): 135.77, #queue-req: 0
  27657. 2025-08-25 00:01:36,308 - __main__ - INFO - sglang running req: 3 queue req: 0
  27658. 2025-08-25 00:01:37,191 - sglang - INFO - [2025-08-25 00:01:37 TP0] Decode batch. #running-req: 3, #token: 18530, token usage: 0.49, gen throughput (token/s): 135.82, #queue-req: 0
  27659. 2025-08-25 00:01:37,191 - __main__ - INFO - sglang running req: 3 queue req: 0
  27660. 2025-08-25 00:01:38,075 - sglang - INFO - [2025-08-25 00:01:38 TP0] Decode batch. #running-req: 3, #token: 18650, token usage: 0.49, gen throughput (token/s): 135.72, #queue-req: 0
  27661. 2025-08-25 00:01:38,075 - __main__ - INFO - sglang running req: 3 queue req: 0
  27662. 2025-08-25 00:01:38,960 - sglang - INFO - [2025-08-25 00:01:38 TP0] Decode batch. #running-req: 3, #token: 18770, token usage: 0.49, gen throughput (token/s): 135.66, #queue-req: 0
  27663. 2025-08-25 00:01:38,960 - __main__ - INFO - sglang running req: 3 queue req: 0
  27664. 2025-08-25 00:01:39,843 - sglang - INFO - [2025-08-25 00:01:39 TP0] Decode batch. #running-req: 3, #token: 18890, token usage: 0.50, gen throughput (token/s): 135.77, #queue-req: 0
  27665. 2025-08-25 00:01:39,844 - __main__ - INFO - sglang running req: 3 queue req: 0
  27666. 2025-08-25 00:01:40,729 - sglang - INFO - [2025-08-25 00:01:40 TP0] Decode batch. #running-req: 3, #token: 19010, token usage: 0.50, gen throughput (token/s): 135.56, #queue-req: 0
  27667. 2025-08-25 00:01:40,729 - __main__ - INFO - sglang running req: 3 queue req: 0
  27668. 2025-08-25 00:01:41,615 - sglang - INFO - [2025-08-25 00:01:41 TP0] Decode batch. #running-req: 3, #token: 19130, token usage: 0.50, gen throughput (token/s): 135.41, #queue-req: 0
  27669. 2025-08-25 00:01:41,615 - __main__ - INFO - sglang running req: 3 queue req: 0
  27670. 2025-08-25 00:01:42,500 - sglang - INFO - [2025-08-25 00:01:42 TP0] Decode batch. #running-req: 3, #token: 19250, token usage: 0.51, gen throughput (token/s): 135.50, #queue-req: 0
  27671. 2025-08-25 00:01:42,501 - __main__ - INFO - sglang running req: 3 queue req: 0
  27672. 2025-08-25 00:01:43,387 - sglang - INFO - [2025-08-25 00:01:43 TP0] Decode batch. #running-req: 3, #token: 19370, token usage: 0.51, gen throughput (token/s): 135.36, #queue-req: 0
  27673. 2025-08-25 00:01:43,387 - __main__ - INFO - sglang running req: 3 queue req: 0
  27674. 2025-08-25 00:01:44,164 - __main__ - INFO - Queue remaining: 0
  27675. 2025-08-25 00:01:44,164 - __main__ - INFO -
  27676. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27677. ----------------------------------------------------------------------------------
  27678. sglang_input_tokens 220.55 220.55
  27679. sglang_output_tokens 52.24 52.24
  27680. 2025-08-25 00:01:44,164 - __main__ - INFO -
  27681. Worker ID | finished | started
  27682. ----------+----------+--------
  27683. 0 | 8 | 11
  27684. 2025-08-25 00:01:44,274 - sglang - INFO - [2025-08-25 00:01:44 TP0] Decode batch. #running-req: 3, #token: 19490, token usage: 0.51, gen throughput (token/s): 135.32, #queue-req: 0
  27685. 2025-08-25 00:01:44,274 - __main__ - INFO - sglang running req: 3 queue req: 0
  27686. 2025-08-25 00:01:45,160 - sglang - INFO - [2025-08-25 00:01:45 TP0] Decode batch. #running-req: 3, #token: 19610, token usage: 0.52, gen throughput (token/s): 135.43, #queue-req: 0
  27687. 2025-08-25 00:01:45,160 - __main__ - INFO - sglang running req: 3 queue req: 0
  27688. 2025-08-25 00:01:46,047 - sglang - INFO - [2025-08-25 00:01:46 TP0] Decode batch. #running-req: 3, #token: 19730, token usage: 0.52, gen throughput (token/s): 135.35, #queue-req: 0
  27689. 2025-08-25 00:01:46,047 - __main__ - INFO - sglang running req: 3 queue req: 0
  27690. 2025-08-25 00:01:46,924 - sglang - INFO - [2025-08-25 00:01:46 TP0] Decode batch. #running-req: 2, #token: 13830, token usage: 0.36, gen throughput (token/s): 116.28, #queue-req: 0
  27691. 2025-08-25 00:01:46,924 - __main__ - INFO - sglang running req: 2 queue req: 0
  27692. 2025-08-25 00:01:47,058 - __main__ - INFO - Reducing anchor text len to 3000 for ./workspace/UNETR.pdf-5
  27693. 2025-08-25 00:01:47,059 - __main__ - WARNING - ValueError on attempt 0 for ./workspace/UNETR.pdf-5: <class 'ValueError'> - Response exceeded model_max_context, cannot use this response
  27694. 2025-08-25 00:01:47,389 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
  27695. 2025-08-25 00:01:47,611 - sglang - INFO - [2025-08-25 00:01:47 TP0] Prefill batch. #new-seq: 1, #new-token: 3206, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.15, #running-req: 1, #queue-req: 0
  27696. 2025-08-25 00:01:47,611 - __main__ - INFO - sglang running req: 1 queue req: 0
  27697. 2025-08-25 00:01:48,704 - sglang - INFO - [2025-08-25 00:01:48 TP0] Decode batch. #running-req: 2, #token: 8838, token usage: 0.23, gen throughput (token/s): 30.33, #queue-req: 0
  27698. 2025-08-25 00:01:48,704 - __main__ - INFO - sglang running req: 2 queue req: 0
  27699. 2025-08-25 00:01:49,557 - sglang - INFO - [2025-08-25 00:01:49 TP0] Decode batch. #running-req: 2, #token: 8918, token usage: 0.23, gen throughput (token/s): 93.81, #queue-req: 0
  27700. 2025-08-25 00:01:49,557 - __main__ - INFO - sglang running req: 2 queue req: 0
  27701. 2025-08-25 00:01:50,409 - sglang - INFO - [2025-08-25 00:01:50 TP0] Decode batch. #running-req: 2, #token: 8998, token usage: 0.24, gen throughput (token/s): 93.85, #queue-req: 0
  27702. 2025-08-25 00:01:50,410 - __main__ - INFO - sglang running req: 2 queue req: 0
  27703. 2025-08-25 00:01:51,262 - sglang - INFO - [2025-08-25 00:01:51 TP0] Decode batch. #running-req: 2, #token: 9078, token usage: 0.24, gen throughput (token/s): 93.76, #queue-req: 0
  27704. 2025-08-25 00:01:51,263 - __main__ - INFO - sglang running req: 2 queue req: 0
  27705. 2025-08-25 00:01:52,116 - sglang - INFO - [2025-08-25 00:01:52 TP0] Decode batch. #running-req: 2, #token: 9158, token usage: 0.24, gen throughput (token/s): 93.73, #queue-req: 0
  27706. 2025-08-25 00:01:52,116 - __main__ - INFO - sglang running req: 2 queue req: 0
  27707. 2025-08-25 00:01:52,964 - sglang - INFO - [2025-08-25 00:01:52 TP0] Decode batch. #running-req: 1, #token: 3414, token usage: 0.09, gen throughput (token/s): 76.63, #queue-req: 0
  27708. 2025-08-25 00:01:52,965 - __main__ - INFO - sglang running req: 1 queue req: 0
  27709. 2025-08-25 00:01:53,802 - sglang - INFO - [2025-08-25 00:01:53 TP0] Decode batch. #running-req: 1, #token: 3454, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
  27710. 2025-08-25 00:01:53,802 - __main__ - INFO - sglang running req: 1 queue req: 0
  27711. 2025-08-25 00:01:54,166 - __main__ - INFO - Queue remaining: 0
  27712. 2025-08-25 00:01:54,166 - __main__ - INFO -
  27713. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27714. ----------------------------------------------------------------------------------
  27715. sglang_input_tokens 260.08 260.08
  27716. sglang_output_tokens 79.05 79.05
  27717. 2025-08-25 00:01:54,166 - __main__ - INFO -
  27718. Worker ID | finished | started
  27719. ----------+----------+--------
  27720. 0 | 10 | 11
  27721. 2025-08-25 00:01:54,639 - sglang - INFO - [2025-08-25 00:01:54 TP0] Decode batch. #running-req: 1, #token: 3494, token usage: 0.09, gen throughput (token/s): 47.78, #queue-req: 0
  27722. 2025-08-25 00:01:54,639 - __main__ - INFO - sglang running req: 1 queue req: 0
  27723. 2025-08-25 00:01:55,477 - sglang - INFO - [2025-08-25 00:01:55 TP0] Decode batch. #running-req: 1, #token: 3534, token usage: 0.09, gen throughput (token/s): 47.75, #queue-req: 0
  27724. 2025-08-25 00:01:55,477 - __main__ - INFO - sglang running req: 1 queue req: 0
  27725. 2025-08-25 00:01:56,316 - sglang - INFO - [2025-08-25 00:01:56 TP0] Decode batch. #running-req: 1, #token: 3574, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  27726. 2025-08-25 00:01:56,316 - __main__ - INFO - sglang running req: 1 queue req: 0
  27727. 2025-08-25 00:01:57,154 - sglang - INFO - [2025-08-25 00:01:57 TP0] Decode batch. #running-req: 1, #token: 3614, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
  27728. 2025-08-25 00:01:57,154 - __main__ - INFO - sglang running req: 1 queue req: 0
  27729. 2025-08-25 00:01:57,992 - sglang - INFO - [2025-08-25 00:01:57 TP0] Decode batch. #running-req: 1, #token: 3654, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
  27730. 2025-08-25 00:01:57,993 - __main__ - INFO - sglang running req: 1 queue req: 0
  27731. 2025-08-25 00:01:58,832 - sglang - INFO - [2025-08-25 00:01:58 TP0] Decode batch. #running-req: 1, #token: 3694, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
  27732. 2025-08-25 00:01:58,832 - __main__ - INFO - sglang running req: 1 queue req: 0
  27733. 2025-08-25 00:01:59,671 - sglang - INFO - [2025-08-25 00:01:59 TP0] Decode batch. #running-req: 1, #token: 3734, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
  27734. 2025-08-25 00:01:59,671 - __main__ - INFO - sglang running req: 1 queue req: 0
  27735. 2025-08-25 00:02:00,511 - sglang - INFO - [2025-08-25 00:02:00 TP0] Decode batch. #running-req: 1, #token: 3774, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
  27736. 2025-08-25 00:02:00,512 - __main__ - INFO - sglang running req: 1 queue req: 0
  27737. 2025-08-25 00:02:01,351 - sglang - INFO - [2025-08-25 00:02:01 TP0] Decode batch. #running-req: 1, #token: 3814, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
  27738. 2025-08-25 00:02:01,351 - __main__ - INFO - sglang running req: 1 queue req: 0
  27739. 2025-08-25 00:02:02,190 - sglang - INFO - [2025-08-25 00:02:02 TP0] Decode batch. #running-req: 1, #token: 3854, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
  27740. 2025-08-25 00:02:02,191 - __main__ - INFO - sglang running req: 1 queue req: 0
  27741. 2025-08-25 00:02:03,030 - sglang - INFO - [2025-08-25 00:02:03 TP0] Decode batch. #running-req: 1, #token: 3894, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
  27742. 2025-08-25 00:02:03,030 - __main__ - INFO - sglang running req: 1 queue req: 0
  27743. 2025-08-25 00:02:03,869 - sglang - INFO - [2025-08-25 00:02:03 TP0] Decode batch. #running-req: 1, #token: 3934, token usage: 0.10, gen throughput (token/s): 47.62, #queue-req: 0
  27744. 2025-08-25 00:02:03,870 - __main__ - INFO - sglang running req: 1 queue req: 0
  27745. 2025-08-25 00:02:04,167 - __main__ - INFO - Queue remaining: 0
  27746. 2025-08-25 00:02:04,168 - __main__ - INFO -
  27747. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27748. ----------------------------------------------------------------------------------
  27749. sglang_input_tokens 242.38 242.38
  27750. sglang_output_tokens 73.67 73.67
  27751. 2025-08-25 00:02:04,168 - __main__ - INFO -
  27752. Worker ID | finished | started
  27753. ----------+----------+--------
  27754. 0 | 10 | 11
  27755. 2025-08-25 00:02:04,709 - sglang - INFO - [2025-08-25 00:02:04 TP0] Decode batch. #running-req: 1, #token: 3974, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
  27756. 2025-08-25 00:02:04,709 - __main__ - INFO - sglang running req: 1 queue req: 0
  27757. 2025-08-25 00:02:05,548 - sglang - INFO - [2025-08-25 00:02:05 TP0] Decode batch. #running-req: 1, #token: 4014, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
  27758. 2025-08-25 00:02:05,548 - __main__ - INFO - sglang running req: 1 queue req: 0
  27759. 2025-08-25 00:02:06,387 - sglang - INFO - [2025-08-25 00:02:06 TP0] Decode batch. #running-req: 1, #token: 4054, token usage: 0.11, gen throughput (token/s): 47.67, #queue-req: 0
  27760. 2025-08-25 00:02:06,387 - __main__ - INFO - sglang running req: 1 queue req: 0
  27761. 2025-08-25 00:02:07,226 - sglang - INFO - [2025-08-25 00:02:07 TP0] Decode batch. #running-req: 1, #token: 4094, token usage: 0.11, gen throughput (token/s): 47.65, #queue-req: 0
  27762. 2025-08-25 00:02:07,227 - __main__ - INFO - sglang running req: 1 queue req: 0
  27763. 2025-08-25 00:02:08,068 - sglang - INFO - [2025-08-25 00:02:08 TP0] Decode batch. #running-req: 1, #token: 4134, token usage: 0.11, gen throughput (token/s): 47.53, #queue-req: 0
  27764. 2025-08-25 00:02:08,068 - __main__ - INFO - sglang running req: 1 queue req: 0
  27765. 2025-08-25 00:02:08,911 - sglang - INFO - [2025-08-25 00:02:08 TP0] Decode batch. #running-req: 1, #token: 4174, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  27766. 2025-08-25 00:02:08,911 - __main__ - INFO - sglang running req: 1 queue req: 0
  27767. 2025-08-25 00:02:09,752 - sglang - INFO - [2025-08-25 00:02:09 TP0] Decode batch. #running-req: 1, #token: 4214, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
  27768. 2025-08-25 00:02:09,752 - __main__ - INFO - sglang running req: 1 queue req: 0
  27769. 2025-08-25 00:02:10,592 - sglang - INFO - [2025-08-25 00:02:10 TP0] Decode batch. #running-req: 1, #token: 4254, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
  27770. 2025-08-25 00:02:10,593 - __main__ - INFO - sglang running req: 1 queue req: 0
  27771. 2025-08-25 00:02:11,434 - sglang - INFO - [2025-08-25 00:02:11 TP0] Decode batch. #running-req: 1, #token: 4294, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
  27772. 2025-08-25 00:02:11,434 - __main__ - INFO - sglang running req: 1 queue req: 0
  27773. 2025-08-25 00:02:12,276 - sglang - INFO - [2025-08-25 00:02:12 TP0] Decode batch. #running-req: 1, #token: 4334, token usage: 0.11, gen throughput (token/s): 47.49, #queue-req: 0
  27774. 2025-08-25 00:02:12,277 - __main__ - INFO - sglang running req: 1 queue req: 0
  27775. 2025-08-25 00:02:13,120 - sglang - INFO - [2025-08-25 00:02:13 TP0] Decode batch. #running-req: 1, #token: 4374, token usage: 0.12, gen throughput (token/s): 47.40, #queue-req: 0
  27776. 2025-08-25 00:02:13,121 - __main__ - INFO - sglang running req: 1 queue req: 0
  27777. 2025-08-25 00:02:13,963 - sglang - INFO - [2025-08-25 00:02:13 TP0] Decode batch. #running-req: 1, #token: 4414, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  27778. 2025-08-25 00:02:13,963 - __main__ - INFO - sglang running req: 1 queue req: 0
  27779. 2025-08-25 00:02:14,170 - __main__ - INFO - Queue remaining: 0
  27780. 2025-08-25 00:02:14,170 - __main__ - INFO -
  27781. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27782. ----------------------------------------------------------------------------------
  27783. sglang_input_tokens 226.93 226.93
  27784. sglang_output_tokens 68.97 68.97
  27785. 2025-08-25 00:02:14,170 - __main__ - INFO -
  27786. Worker ID | finished | started
  27787. ----------+----------+--------
  27788. 0 | 10 | 11
  27789. 2025-08-25 00:02:14,804 - sglang - INFO - [2025-08-25 00:02:14 TP0] Decode batch. #running-req: 1, #token: 4454, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
  27790. 2025-08-25 00:02:14,805 - __main__ - INFO - sglang running req: 1 queue req: 0
  27791. 2025-08-25 00:02:15,646 - sglang - INFO - [2025-08-25 00:02:15 TP0] Decode batch. #running-req: 1, #token: 4494, token usage: 0.12, gen throughput (token/s): 47.53, #queue-req: 0
  27792. 2025-08-25 00:02:15,646 - __main__ - INFO - sglang running req: 1 queue req: 0
  27793. 2025-08-25 00:02:16,489 - sglang - INFO - [2025-08-25 00:02:16 TP0] Decode batch. #running-req: 1, #token: 4534, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  27794. 2025-08-25 00:02:16,489 - __main__ - INFO - sglang running req: 1 queue req: 0
  27795. 2025-08-25 00:02:17,331 - sglang - INFO - [2025-08-25 00:02:17 TP0] Decode batch. #running-req: 1, #token: 4574, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  27796. 2025-08-25 00:02:17,331 - __main__ - INFO - sglang running req: 1 queue req: 0
  27797. 2025-08-25 00:02:18,173 - sglang - INFO - [2025-08-25 00:02:18 TP0] Decode batch. #running-req: 1, #token: 4614, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  27798. 2025-08-25 00:02:18,173 - __main__ - INFO - sglang running req: 1 queue req: 0
  27799. 2025-08-25 00:02:19,016 - sglang - INFO - [2025-08-25 00:02:19 TP0] Decode batch. #running-req: 1, #token: 4654, token usage: 0.12, gen throughput (token/s): 47.45, #queue-req: 0
  27800. 2025-08-25 00:02:19,017 - __main__ - INFO - sglang running req: 1 queue req: 0
  27801. 2025-08-25 00:02:19,859 - sglang - INFO - [2025-08-25 00:02:19 TP0] Decode batch. #running-req: 1, #token: 4694, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
  27802. 2025-08-25 00:02:19,860 - __main__ - INFO - sglang running req: 1 queue req: 0
  27803. 2025-08-25 00:02:20,704 - sglang - INFO - [2025-08-25 00:02:20 TP0] Decode batch. #running-req: 1, #token: 4734, token usage: 0.12, gen throughput (token/s): 47.37, #queue-req: 0
  27804. 2025-08-25 00:02:20,704 - __main__ - INFO - sglang running req: 1 queue req: 0
  27805. 2025-08-25 00:02:21,548 - sglang - INFO - [2025-08-25 00:02:21 TP0] Decode batch. #running-req: 1, #token: 4774, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
  27806. 2025-08-25 00:02:21,548 - __main__ - INFO - sglang running req: 1 queue req: 0
  27807. 2025-08-25 00:02:22,392 - sglang - INFO - [2025-08-25 00:02:22 TP0] Decode batch. #running-req: 1, #token: 4814, token usage: 0.13, gen throughput (token/s): 47.37, #queue-req: 0
  27808. 2025-08-25 00:02:22,392 - __main__ - INFO - sglang running req: 1 queue req: 0
  27809. 2025-08-25 00:02:23,236 - sglang - INFO - [2025-08-25 00:02:23 TP0] Decode batch. #running-req: 1, #token: 4854, token usage: 0.13, gen throughput (token/s): 47.40, #queue-req: 0
  27810. 2025-08-25 00:02:23,236 - __main__ - INFO - sglang running req: 1 queue req: 0
  27811. 2025-08-25 00:02:24,089 - sglang - INFO - [2025-08-25 00:02:24 TP0] Decode batch. #running-req: 1, #token: 4894, token usage: 0.13, gen throughput (token/s): 46.90, #queue-req: 0
  27812. 2025-08-25 00:02:24,089 - __main__ - INFO - sglang running req: 1 queue req: 0
  27813. 2025-08-25 00:02:24,171 - __main__ - INFO - Queue remaining: 0
  27814. 2025-08-25 00:02:24,172 - __main__ - INFO -
  27815. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27816. ----------------------------------------------------------------------------------
  27817. sglang_input_tokens 213.34 213.34
  27818. sglang_output_tokens 64.84 64.84
  27819. 2025-08-25 00:02:24,172 - __main__ - INFO -
  27820. Worker ID | finished | started
  27821. ----------+----------+--------
  27822. 0 | 10 | 11
  27823. 2025-08-25 00:02:24,939 - sglang - INFO - [2025-08-25 00:02:24 TP0] Decode batch. #running-req: 1, #token: 4934, token usage: 0.13, gen throughput (token/s): 47.04, #queue-req: 0
  27824. 2025-08-25 00:02:24,939 - __main__ - INFO - sglang running req: 1 queue req: 0
  27825. 2025-08-25 00:02:25,790 - sglang - INFO - [2025-08-25 00:02:25 TP0] Decode batch. #running-req: 1, #token: 4974, token usage: 0.13, gen throughput (token/s): 46.99, #queue-req: 0
  27826. 2025-08-25 00:02:25,791 - __main__ - INFO - sglang running req: 1 queue req: 0
  27827. 2025-08-25 00:02:26,640 - sglang - INFO - [2025-08-25 00:02:26 TP0] Decode batch. #running-req: 1, #token: 5014, token usage: 0.13, gen throughput (token/s): 47.09, #queue-req: 0
  27828. 2025-08-25 00:02:26,640 - __main__ - INFO - sglang running req: 1 queue req: 0
  27829. 2025-08-25 00:02:27,485 - sglang - INFO - [2025-08-25 00:02:27 TP0] Decode batch. #running-req: 1, #token: 5054, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
  27830. 2025-08-25 00:02:27,485 - __main__ - INFO - sglang running req: 1 queue req: 0
  27831. 2025-08-25 00:02:28,330 - sglang - INFO - [2025-08-25 00:02:28 TP0] Decode batch. #running-req: 1, #token: 5094, token usage: 0.13, gen throughput (token/s): 47.30, #queue-req: 0
  27832. 2025-08-25 00:02:28,331 - __main__ - INFO - sglang running req: 1 queue req: 0
  27833. 2025-08-25 00:02:29,176 - sglang - INFO - [2025-08-25 00:02:29 TP0] Decode batch. #running-req: 1, #token: 5134, token usage: 0.14, gen throughput (token/s): 47.29, #queue-req: 0
  27834. 2025-08-25 00:02:29,177 - __main__ - INFO - sglang running req: 1 queue req: 0
  27835. 2025-08-25 00:02:30,023 - sglang - INFO - [2025-08-25 00:02:30 TP0] Decode batch. #running-req: 1, #token: 5174, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
  27836. 2025-08-25 00:02:30,024 - __main__ - INFO - sglang running req: 1 queue req: 0
  27837. 2025-08-25 00:02:30,871 - sglang - INFO - [2025-08-25 00:02:30 TP0] Decode batch. #running-req: 1, #token: 5214, token usage: 0.14, gen throughput (token/s): 47.21, #queue-req: 0
  27838. 2025-08-25 00:02:30,871 - __main__ - INFO - sglang running req: 1 queue req: 0
  27839. 2025-08-25 00:02:31,716 - sglang - INFO - [2025-08-25 00:02:31 TP0] Decode batch. #running-req: 1, #token: 5254, token usage: 0.14, gen throughput (token/s): 47.30, #queue-req: 0
  27840. 2025-08-25 00:02:31,717 - __main__ - INFO - sglang running req: 1 queue req: 0
  27841. 2025-08-25 00:02:32,564 - sglang - INFO - [2025-08-25 00:02:32 TP0] Decode batch. #running-req: 1, #token: 5294, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
  27842. 2025-08-25 00:02:32,564 - __main__ - INFO - sglang running req: 1 queue req: 0
  27843. 2025-08-25 00:02:33,412 - sglang - INFO - [2025-08-25 00:02:33 TP0] Decode batch. #running-req: 1, #token: 5334, token usage: 0.14, gen throughput (token/s): 47.16, #queue-req: 0
  27844. 2025-08-25 00:02:33,412 - __main__ - INFO - sglang running req: 1 queue req: 0
  27845. 2025-08-25 00:02:34,172 - __main__ - INFO - Queue remaining: 0
  27846. 2025-08-25 00:02:34,173 - __main__ - INFO -
  27847. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27848. ----------------------------------------------------------------------------------
  27849. sglang_input_tokens 201.28 201.28
  27850. sglang_output_tokens 61.18 61.18
  27851. 2025-08-25 00:02:34,173 - __main__ - INFO -
  27852. Worker ID | finished | started
  27853. ----------+----------+--------
  27854. 0 | 10 | 11
  27855. 2025-08-25 00:02:34,260 - sglang - INFO - [2025-08-25 00:02:34 TP0] Decode batch. #running-req: 1, #token: 5374, token usage: 0.14, gen throughput (token/s): 47.17, #queue-req: 0
  27856. 2025-08-25 00:02:34,260 - __main__ - INFO - sglang running req: 1 queue req: 0
  27857. 2025-08-25 00:02:35,108 - sglang - INFO - [2025-08-25 00:02:35 TP0] Decode batch. #running-req: 1, #token: 5414, token usage: 0.14, gen throughput (token/s): 47.15, #queue-req: 0
  27858. 2025-08-25 00:02:35,108 - __main__ - INFO - sglang running req: 1 queue req: 0
  27859. 2025-08-25 00:02:35,955 - sglang - INFO - [2025-08-25 00:02:35 TP0] Decode batch. #running-req: 1, #token: 5454, token usage: 0.14, gen throughput (token/s): 47.22, #queue-req: 0
  27860. 2025-08-25 00:02:35,955 - __main__ - INFO - sglang running req: 1 queue req: 0
  27861. 2025-08-25 00:02:36,802 - sglang - INFO - [2025-08-25 00:02:36 TP0] Decode batch. #running-req: 1, #token: 5494, token usage: 0.14, gen throughput (token/s): 47.21, #queue-req: 0
  27862. 2025-08-25 00:02:36,802 - __main__ - INFO - sglang running req: 1 queue req: 0
  27863. 2025-08-25 00:02:37,649 - sglang - INFO - [2025-08-25 00:02:37 TP0] Decode batch. #running-req: 1, #token: 5534, token usage: 0.15, gen throughput (token/s): 47.21, #queue-req: 0
  27864. 2025-08-25 00:02:37,650 - __main__ - INFO - sglang running req: 1 queue req: 0
  27865. 2025-08-25 00:02:38,497 - sglang - INFO - [2025-08-25 00:02:38 TP0] Decode batch. #running-req: 1, #token: 5574, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
  27866. 2025-08-25 00:02:38,498 - __main__ - INFO - sglang running req: 1 queue req: 0
  27867. 2025-08-25 00:02:39,347 - sglang - INFO - [2025-08-25 00:02:39 TP0] Decode batch. #running-req: 1, #token: 5614, token usage: 0.15, gen throughput (token/s): 47.09, #queue-req: 0
  27868. 2025-08-25 00:02:39,347 - __main__ - INFO - sglang running req: 1 queue req: 0
  27869. 2025-08-25 00:02:40,195 - sglang - INFO - [2025-08-25 00:02:40 TP0] Decode batch. #running-req: 1, #token: 5654, token usage: 0.15, gen throughput (token/s): 47.16, #queue-req: 0
  27870. 2025-08-25 00:02:40,195 - __main__ - INFO - sglang running req: 1 queue req: 0
  27871. 2025-08-25 00:02:41,043 - sglang - INFO - [2025-08-25 00:02:41 TP0] Decode batch. #running-req: 1, #token: 5694, token usage: 0.15, gen throughput (token/s): 47.16, #queue-req: 0
  27872. 2025-08-25 00:02:41,044 - __main__ - INFO - sglang running req: 1 queue req: 0
  27873. 2025-08-25 00:02:41,891 - sglang - INFO - [2025-08-25 00:02:41 TP0] Decode batch. #running-req: 1, #token: 5734, token usage: 0.15, gen throughput (token/s): 47.18, #queue-req: 0
  27874. 2025-08-25 00:02:41,891 - __main__ - INFO - sglang running req: 1 queue req: 0
  27875. 2025-08-25 00:02:42,740 - sglang - INFO - [2025-08-25 00:02:42 TP0] Decode batch. #running-req: 1, #token: 5774, token usage: 0.15, gen throughput (token/s): 47.15, #queue-req: 0
  27876. 2025-08-25 00:02:42,740 - __main__ - INFO - sglang running req: 1 queue req: 0
  27877. 2025-08-25 00:02:43,590 - sglang - INFO - [2025-08-25 00:02:43 TP0] Decode batch. #running-req: 1, #token: 5814, token usage: 0.15, gen throughput (token/s): 47.06, #queue-req: 0
  27878. 2025-08-25 00:02:43,590 - __main__ - INFO - sglang running req: 1 queue req: 0
  27879. 2025-08-25 00:02:44,174 - __main__ - INFO - Queue remaining: 0
  27880. 2025-08-25 00:02:44,174 - __main__ - INFO -
  27881. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27882. ----------------------------------------------------------------------------------
  27883. sglang_input_tokens 190.51 190.51
  27884. sglang_output_tokens 57.91 57.91
  27885. 2025-08-25 00:02:44,174 - __main__ - INFO -
  27886. Worker ID | finished | started
  27887. ----------+----------+--------
  27888. 0 | 10 | 11
  27889. 2025-08-25 00:02:44,439 - sglang - INFO - [2025-08-25 00:02:44 TP0] Decode batch. #running-req: 1, #token: 5854, token usage: 0.15, gen throughput (token/s): 47.12, #queue-req: 0
  27890. 2025-08-25 00:02:44,439 - __main__ - INFO - sglang running req: 1 queue req: 0
  27891. 2025-08-25 00:02:45,287 - sglang - INFO - [2025-08-25 00:02:45 TP0] Decode batch. #running-req: 1, #token: 5894, token usage: 0.16, gen throughput (token/s): 47.17, #queue-req: 0
  27892. 2025-08-25 00:02:45,287 - __main__ - INFO - sglang running req: 1 queue req: 0
  27893. 2025-08-25 00:02:46,133 - sglang - INFO - [2025-08-25 00:02:46 TP0] Decode batch. #running-req: 1, #token: 5934, token usage: 0.16, gen throughput (token/s): 47.24, #queue-req: 0
  27894. 2025-08-25 00:02:46,133 - __main__ - INFO - sglang running req: 1 queue req: 0
  27895. 2025-08-25 00:02:46,982 - sglang - INFO - [2025-08-25 00:02:46 TP0] Decode batch. #running-req: 1, #token: 5974, token usage: 0.16, gen throughput (token/s): 47.13, #queue-req: 0
  27896. 2025-08-25 00:02:46,982 - __main__ - INFO - sglang running req: 1 queue req: 0
  27897. 2025-08-25 00:02:47,830 - sglang - INFO - [2025-08-25 00:02:47 TP0] Decode batch. #running-req: 1, #token: 6014, token usage: 0.16, gen throughput (token/s): 47.17, #queue-req: 0
  27898. 2025-08-25 00:02:47,830 - __main__ - INFO - sglang running req: 1 queue req: 0
  27899. 2025-08-25 00:02:48,676 - sglang - INFO - [2025-08-25 00:02:48 TP0] Decode batch. #running-req: 1, #token: 6054, token usage: 0.16, gen throughput (token/s): 47.25, #queue-req: 0
  27900. 2025-08-25 00:02:48,677 - __main__ - INFO - sglang running req: 1 queue req: 0
  27901. 2025-08-25 00:02:49,525 - sglang - INFO - [2025-08-25 00:02:49 TP0] Decode batch. #running-req: 1, #token: 6094, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
  27902. 2025-08-25 00:02:49,525 - __main__ - INFO - sglang running req: 1 queue req: 0
  27903. 2025-08-25 00:02:50,373 - sglang - INFO - [2025-08-25 00:02:50 TP0] Decode batch. #running-req: 1, #token: 6134, token usage: 0.16, gen throughput (token/s): 47.18, #queue-req: 0
  27904. 2025-08-25 00:02:50,373 - __main__ - INFO - sglang running req: 1 queue req: 0
  27905. 2025-08-25 00:02:51,221 - sglang - INFO - [2025-08-25 00:02:51 TP0] Decode batch. #running-req: 1, #token: 6174, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
  27906. 2025-08-25 00:02:51,221 - __main__ - INFO - sglang running req: 1 queue req: 0
  27907. 2025-08-25 00:02:51,885 - __main__ - WARNING - JSON decode error on attempt 1 for ./workspace/UNETR.pdf-5: Unterminated string starting at: line 1 column 126 (char 125)
  27908. 2025-08-25 00:02:52,151 - __main__ - INFO - Built page query for ./workspace/UNETR.pdf-5
  27909. 2025-08-25 00:02:52,318 - sglang - INFO - [2025-08-25 00:02:52 TP0] Prefill batch. #new-seq: 1, #new-token: 3125, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0
  27910. 2025-08-25 00:02:52,318 - __main__ - INFO - sglang running req: 0 queue req: 0
  27911. 2025-08-25 00:02:53,418 - sglang - INFO - [2025-08-25 00:02:53 TP0] Decode batch. #running-req: 1, #token: 3134, token usage: 0.08, gen throughput (token/s): 18.21, #queue-req: 0
  27912. 2025-08-25 00:02:53,418 - __main__ - INFO - sglang running req: 1 queue req: 0
  27913. 2025-08-25 00:02:54,175 - __main__ - INFO - Queue remaining: 0
  27914. 2025-08-25 00:02:54,175 - __main__ - INFO -
  27915. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27916. ----------------------------------------------------------------------------------
  27917. sglang_input_tokens 197.11 197.11
  27918. sglang_output_tokens 70.20 70.20
  27919. 2025-08-25 00:02:54,175 - __main__ - INFO -
  27920. Worker ID | finished | started
  27921. ----------+----------+--------
  27922. 0 | 10 | 11
  27923. 2025-08-25 00:02:54,256 - sglang - INFO - [2025-08-25 00:02:54 TP0] Decode batch. #running-req: 1, #token: 3174, token usage: 0.08, gen throughput (token/s): 47.70, #queue-req: 0
  27924. 2025-08-25 00:02:54,257 - __main__ - INFO - sglang running req: 1 queue req: 0
  27925. 2025-08-25 00:02:55,095 - sglang - INFO - [2025-08-25 00:02:55 TP0] Decode batch. #running-req: 1, #token: 3214, token usage: 0.08, gen throughput (token/s): 47.71, #queue-req: 0
  27926. 2025-08-25 00:02:55,095 - __main__ - INFO - sglang running req: 1 queue req: 0
  27927. 2025-08-25 00:02:55,934 - sglang - INFO - [2025-08-25 00:02:55 TP0] Decode batch. #running-req: 1, #token: 3254, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  27928. 2025-08-25 00:02:55,935 - __main__ - INFO - sglang running req: 1 queue req: 0
  27929. 2025-08-25 00:02:56,774 - sglang - INFO - [2025-08-25 00:02:56 TP0] Decode batch. #running-req: 1, #token: 3294, token usage: 0.09, gen throughput (token/s): 47.66, #queue-req: 0
  27930. 2025-08-25 00:02:56,774 - __main__ - INFO - sglang running req: 1 queue req: 0
  27931. 2025-08-25 00:02:57,613 - sglang - INFO - [2025-08-25 00:02:57 TP0] Decode batch. #running-req: 1, #token: 3334, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  27932. 2025-08-25 00:02:57,614 - __main__ - INFO - sglang running req: 1 queue req: 0
  27933. 2025-08-25 00:02:58,453 - sglang - INFO - [2025-08-25 00:02:58 TP0] Decode batch. #running-req: 1, #token: 3374, token usage: 0.09, gen throughput (token/s): 47.63, #queue-req: 0
  27934. 2025-08-25 00:02:58,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  27935. 2025-08-25 00:02:59,292 - sglang - INFO - [2025-08-25 00:02:59 TP0] Decode batch. #running-req: 1, #token: 3414, token usage: 0.09, gen throughput (token/s): 47.71, #queue-req: 0
  27936. 2025-08-25 00:02:59,292 - __main__ - INFO - sglang running req: 1 queue req: 0
  27937. 2025-08-25 00:03:00,130 - sglang - INFO - [2025-08-25 00:03:00 TP0] Decode batch. #running-req: 1, #token: 3454, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  27938. 2025-08-25 00:03:00,131 - __main__ - INFO - sglang running req: 1 queue req: 0
  27939. 2025-08-25 00:03:00,970 - sglang - INFO - [2025-08-25 00:03:00 TP0] Decode batch. #running-req: 1, #token: 3494, token usage: 0.09, gen throughput (token/s): 47.62, #queue-req: 0
  27940. 2025-08-25 00:03:00,971 - __main__ - INFO - sglang running req: 1 queue req: 0
  27941. 2025-08-25 00:03:01,810 - sglang - INFO - [2025-08-25 00:03:01 TP0] Decode batch. #running-req: 1, #token: 3534, token usage: 0.09, gen throughput (token/s): 47.65, #queue-req: 0
  27942. 2025-08-25 00:03:01,810 - __main__ - INFO - sglang running req: 1 queue req: 0
  27943. 2025-08-25 00:03:02,649 - sglang - INFO - [2025-08-25 00:03:02 TP0] Decode batch. #running-req: 1, #token: 3574, token usage: 0.09, gen throughput (token/s): 47.69, #queue-req: 0
  27944. 2025-08-25 00:03:02,649 - __main__ - INFO - sglang running req: 1 queue req: 0
  27945. 2025-08-25 00:03:03,488 - sglang - INFO - [2025-08-25 00:03:03 TP0] Decode batch. #running-req: 1, #token: 3614, token usage: 0.10, gen throughput (token/s): 47.67, #queue-req: 0
  27946. 2025-08-25 00:03:03,488 - __main__ - INFO - sglang running req: 1 queue req: 0
  27947. 2025-08-25 00:03:04,176 - __main__ - INFO - Queue remaining: 0
  27948. 2025-08-25 00:03:04,177 - __main__ - INFO -
  27949. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27950. ----------------------------------------------------------------------------------
  27951. sglang_input_tokens 187.59 187.59
  27952. sglang_output_tokens 66.80 66.80
  27953. 2025-08-25 00:03:04,177 - __main__ - INFO -
  27954. Worker ID | finished | started
  27955. ----------+----------+--------
  27956. 0 | 10 | 11
  27957. 2025-08-25 00:03:04,328 - sglang - INFO - [2025-08-25 00:03:04 TP0] Decode batch. #running-req: 1, #token: 3654, token usage: 0.10, gen throughput (token/s): 47.59, #queue-req: 0
  27958. 2025-08-25 00:03:04,329 - __main__ - INFO - sglang running req: 1 queue req: 0
  27959. 2025-08-25 00:03:05,168 - sglang - INFO - [2025-08-25 00:03:05 TP0] Decode batch. #running-req: 1, #token: 3694, token usage: 0.10, gen throughput (token/s): 47.64, #queue-req: 0
  27960. 2025-08-25 00:03:05,168 - __main__ - INFO - sglang running req: 1 queue req: 0
  27961. 2025-08-25 00:03:06,007 - sglang - INFO - [2025-08-25 00:03:06 TP0] Decode batch. #running-req: 1, #token: 3734, token usage: 0.10, gen throughput (token/s): 47.68, #queue-req: 0
  27962. 2025-08-25 00:03:06,007 - __main__ - INFO - sglang running req: 1 queue req: 0
  27963. 2025-08-25 00:03:06,845 - sglang - INFO - [2025-08-25 00:03:06 TP0] Decode batch. #running-req: 1, #token: 3774, token usage: 0.10, gen throughput (token/s): 47.71, #queue-req: 0
  27964. 2025-08-25 00:03:06,845 - __main__ - INFO - sglang running req: 1 queue req: 0
  27965. 2025-08-25 00:03:07,685 - sglang - INFO - [2025-08-25 00:03:07 TP0] Decode batch. #running-req: 1, #token: 3814, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
  27966. 2025-08-25 00:03:07,685 - __main__ - INFO - sglang running req: 1 queue req: 0
  27967. 2025-08-25 00:03:08,524 - sglang - INFO - [2025-08-25 00:03:08 TP0] Decode batch. #running-req: 1, #token: 3854, token usage: 0.10, gen throughput (token/s): 47.65, #queue-req: 0
  27968. 2025-08-25 00:03:08,524 - __main__ - INFO - sglang running req: 1 queue req: 0
  27969. 2025-08-25 00:03:09,363 - sglang - INFO - [2025-08-25 00:03:09 TP0] Decode batch. #running-req: 1, #token: 3894, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
  27970. 2025-08-25 00:03:09,364 - __main__ - INFO - sglang running req: 1 queue req: 0
  27971. 2025-08-25 00:03:10,204 - sglang - INFO - [2025-08-25 00:03:10 TP0] Decode batch. #running-req: 1, #token: 3934, token usage: 0.10, gen throughput (token/s): 47.61, #queue-req: 0
  27972. 2025-08-25 00:03:10,204 - __main__ - INFO - sglang running req: 1 queue req: 0
  27973. 2025-08-25 00:03:11,043 - sglang - INFO - [2025-08-25 00:03:11 TP0] Decode batch. #running-req: 1, #token: 3974, token usage: 0.10, gen throughput (token/s): 47.66, #queue-req: 0
  27974. 2025-08-25 00:03:11,043 - __main__ - INFO - sglang running req: 1 queue req: 0
  27975. 2025-08-25 00:03:11,884 - sglang - INFO - [2025-08-25 00:03:11 TP0] Decode batch. #running-req: 1, #token: 4014, token usage: 0.11, gen throughput (token/s): 47.56, #queue-req: 0
  27976. 2025-08-25 00:03:11,884 - __main__ - INFO - sglang running req: 1 queue req: 0
  27977. 2025-08-25 00:03:12,724 - sglang - INFO - [2025-08-25 00:03:12 TP0] Decode batch. #running-req: 1, #token: 4054, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
  27978. 2025-08-25 00:03:12,724 - __main__ - INFO - sglang running req: 1 queue req: 0
  27979. 2025-08-25 00:03:13,565 - sglang - INFO - [2025-08-25 00:03:13 TP0] Decode batch. #running-req: 1, #token: 4094, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
  27980. 2025-08-25 00:03:13,565 - __main__ - INFO - sglang running req: 1 queue req: 0
  27981. 2025-08-25 00:03:14,178 - __main__ - INFO - Queue remaining: 0
  27982. 2025-08-25 00:03:14,178 - __main__ - INFO -
  27983. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  27984. ----------------------------------------------------------------------------------
  27985. sglang_input_tokens 178.94 178.94
  27986. sglang_output_tokens 63.72 63.72
  27987. 2025-08-25 00:03:14,178 - __main__ - INFO -
  27988. Worker ID | finished | started
  27989. ----------+----------+--------
  27990. 0 | 10 | 11
  27991. 2025-08-25 00:03:14,405 - sglang - INFO - [2025-08-25 00:03:14 TP0] Decode batch. #running-req: 1, #token: 4134, token usage: 0.11, gen throughput (token/s): 47.60, #queue-req: 0
  27992. 2025-08-25 00:03:14,406 - __main__ - INFO - sglang running req: 1 queue req: 0
  27993. 2025-08-25 00:03:15,246 - sglang - INFO - [2025-08-25 00:03:15 TP0] Decode batch. #running-req: 1, #token: 4174, token usage: 0.11, gen throughput (token/s): 47.59, #queue-req: 0
  27994. 2025-08-25 00:03:15,246 - __main__ - INFO - sglang running req: 1 queue req: 0
  27995. 2025-08-25 00:03:16,087 - sglang - INFO - [2025-08-25 00:03:16 TP0] Decode batch. #running-req: 1, #token: 4214, token usage: 0.11, gen throughput (token/s): 47.55, #queue-req: 0
  27996. 2025-08-25 00:03:16,087 - __main__ - INFO - sglang running req: 1 queue req: 0
  27997. 2025-08-25 00:03:16,927 - sglang - INFO - [2025-08-25 00:03:16 TP0] Decode batch. #running-req: 1, #token: 4254, token usage: 0.11, gen throughput (token/s): 47.60, #queue-req: 0
  27998. 2025-08-25 00:03:16,928 - __main__ - INFO - sglang running req: 1 queue req: 0
  27999. 2025-08-25 00:03:17,768 - sglang - INFO - [2025-08-25 00:03:17 TP0] Decode batch. #running-req: 1, #token: 4294, token usage: 0.11, gen throughput (token/s): 47.57, #queue-req: 0
  28000. 2025-08-25 00:03:17,769 - __main__ - INFO - sglang running req: 1 queue req: 0
  28001. 2025-08-25 00:03:18,610 - sglang - INFO - [2025-08-25 00:03:18 TP0] Decode batch. #running-req: 1, #token: 4334, token usage: 0.11, gen throughput (token/s): 47.52, #queue-req: 0
  28002. 2025-08-25 00:03:18,610 - __main__ - INFO - sglang running req: 1 queue req: 0
  28003. 2025-08-25 00:03:19,453 - sglang - INFO - [2025-08-25 00:03:19 TP0] Decode batch. #running-req: 1, #token: 4374, token usage: 0.12, gen throughput (token/s): 47.47, #queue-req: 0
  28004. 2025-08-25 00:03:19,453 - __main__ - INFO - sglang running req: 1 queue req: 0
  28005. 2025-08-25 00:03:20,294 - sglang - INFO - [2025-08-25 00:03:20 TP0] Decode batch. #running-req: 1, #token: 4414, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  28006. 2025-08-25 00:03:20,295 - __main__ - INFO - sglang running req: 1 queue req: 0
  28007. 2025-08-25 00:03:21,136 - sglang - INFO - [2025-08-25 00:03:21 TP0] Decode batch. #running-req: 1, #token: 4454, token usage: 0.12, gen throughput (token/s): 47.52, #queue-req: 0
  28008. 2025-08-25 00:03:21,136 - __main__ - INFO - sglang running req: 1 queue req: 0
  28009. 2025-08-25 00:03:21,978 - sglang - INFO - [2025-08-25 00:03:21 TP0] Decode batch. #running-req: 1, #token: 4494, token usage: 0.12, gen throughput (token/s): 47.51, #queue-req: 0
  28010. 2025-08-25 00:03:21,978 - __main__ - INFO - sglang running req: 1 queue req: 0
  28011. 2025-08-25 00:03:22,821 - sglang - INFO - [2025-08-25 00:03:22 TP0] Decode batch. #running-req: 1, #token: 4534, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  28012. 2025-08-25 00:03:22,821 - __main__ - INFO - sglang running req: 1 queue req: 0
  28013. 2025-08-25 00:03:23,664 - sglang - INFO - [2025-08-25 00:03:23 TP0] Decode batch. #running-req: 1, #token: 4574, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  28014. 2025-08-25 00:03:23,664 - __main__ - INFO - sglang running req: 1 queue req: 0
  28015. 2025-08-25 00:03:24,179 - __main__ - INFO - Queue remaining: 0
  28016. 2025-08-25 00:03:24,180 - __main__ - INFO -
  28017. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  28018. ----------------------------------------------------------------------------------
  28019. sglang_input_tokens 171.06 171.06
  28020. sglang_output_tokens 60.92 60.92
  28021. 2025-08-25 00:03:24,180 - __main__ - INFO -
  28022. Worker ID | finished | started
  28023. ----------+----------+--------
  28024. 0 | 10 | 11
  28025. 2025-08-25 00:03:24,507 - sglang - INFO - [2025-08-25 00:03:24 TP0] Decode batch. #running-req: 1, #token: 4614, token usage: 0.12, gen throughput (token/s): 47.42, #queue-req: 0
  28026. 2025-08-25 00:03:24,508 - __main__ - INFO - sglang running req: 1 queue req: 0
  28027. 2025-08-25 00:03:25,350 - sglang - INFO - [2025-08-25 00:03:25 TP0] Decode batch. #running-req: 1, #token: 4654, token usage: 0.12, gen throughput (token/s): 47.44, #queue-req: 0
  28028. 2025-08-25 00:03:25,351 - __main__ - INFO - sglang running req: 1 queue req: 0
  28029. 2025-08-25 00:03:26,193 - sglang - INFO - [2025-08-25 00:03:26 TP0] Decode batch. #running-req: 1, #token: 4694, token usage: 0.12, gen throughput (token/s): 47.46, #queue-req: 0
  28030. 2025-08-25 00:03:26,194 - __main__ - INFO - sglang running req: 1 queue req: 0
  28031. 2025-08-25 00:03:27,037 - sglang - INFO - [2025-08-25 00:03:27 TP0] Decode batch. #running-req: 1, #token: 4734, token usage: 0.12, gen throughput (token/s): 47.40, #queue-req: 0
  28032. 2025-08-25 00:03:27,037 - __main__ - INFO - sglang running req: 1 queue req: 0
  28033. 2025-08-25 00:03:27,881 - sglang - INFO - [2025-08-25 00:03:27 TP0] Decode batch. #running-req: 1, #token: 4774, token usage: 0.13, gen throughput (token/s): 47.43, #queue-req: 0
  28034. 2025-08-25 00:03:27,881 - __main__ - INFO - sglang running req: 1 queue req: 0
  28035. 2025-08-25 00:03:28,723 - sglang - INFO - [2025-08-25 00:03:28 TP0] Decode batch. #running-req: 1, #token: 4814, token usage: 0.13, gen throughput (token/s): 47.48, #queue-req: 0
  28036. 2025-08-25 00:03:28,723 - __main__ - INFO - sglang running req: 1 queue req: 0
  28037. 2025-08-25 00:03:29,565 - sglang - INFO - [2025-08-25 00:03:29 TP0] Decode batch. #running-req: 1, #token: 4854, token usage: 0.13, gen throughput (token/s): 47.50, #queue-req: 0
  28038. 2025-08-25 00:03:29,565 - __main__ - INFO - sglang running req: 1 queue req: 0
  28039. 2025-08-25 00:03:30,408 - sglang - INFO - [2025-08-25 00:03:30 TP0] Decode batch. #running-req: 1, #token: 4894, token usage: 0.13, gen throughput (token/s): 47.45, #queue-req: 0
  28040. 2025-08-25 00:03:30,408 - __main__ - INFO - sglang running req: 1 queue req: 0
  28041. 2025-08-25 00:03:31,254 - sglang - INFO - [2025-08-25 00:03:31 TP0] Decode batch. #running-req: 1, #token: 4934, token usage: 0.13, gen throughput (token/s): 47.31, #queue-req: 0
  28042. 2025-08-25 00:03:31,254 - __main__ - INFO - sglang running req: 1 queue req: 0
  28043. 2025-08-25 00:03:32,098 - sglang - INFO - [2025-08-25 00:03:32 TP0] Decode batch. #running-req: 1, #token: 4974, token usage: 0.13, gen throughput (token/s): 47.35, #queue-req: 0
  28044. 2025-08-25 00:03:32,098 - __main__ - INFO - sglang running req: 1 queue req: 0
  28045. 2025-08-25 00:03:32,943 - sglang - INFO - [2025-08-25 00:03:32 TP0] Decode batch. #running-req: 1, #token: 5014, token usage: 0.13, gen throughput (token/s): 47.33, #queue-req: 0
  28046. 2025-08-25 00:03:32,944 - __main__ - INFO - sglang running req: 1 queue req: 0
  28047. 2025-08-25 00:03:33,787 - sglang - INFO - [2025-08-25 00:03:33 TP0] Decode batch. #running-req: 1, #token: 5054, token usage: 0.13, gen throughput (token/s): 47.39, #queue-req: 0
  28048. 2025-08-25 00:03:33,788 - __main__ - INFO - sglang running req: 1 queue req: 0
  28049. 2025-08-25 00:03:34,181 - __main__ - INFO - Queue remaining: 0
  28050. 2025-08-25 00:03:34,181 - __main__ - INFO -
  28051. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  28052. ----------------------------------------------------------------------------------
  28053. sglang_input_tokens 163.84 163.84
  28054. sglang_output_tokens 58.35 58.35
  28055. 2025-08-25 00:03:34,181 - __main__ - INFO -
  28056. Worker ID | finished | started
  28057. ----------+----------+--------
  28058. 0 | 10 | 11
  28059. 2025-08-25 00:03:34,632 - sglang - INFO - [2025-08-25 00:03:34 TP0] Decode batch. #running-req: 1, #token: 5094, token usage: 0.13, gen throughput (token/s): 47.38, #queue-req: 0
  28060. 2025-08-25 00:03:34,632 - __main__ - INFO - sglang running req: 1 queue req: 0
  28061. 2025-08-25 00:03:35,477 - sglang - INFO - [2025-08-25 00:03:35 TP0] Decode batch. #running-req: 1, #token: 5134, token usage: 0.14, gen throughput (token/s): 47.33, #queue-req: 0
  28062. 2025-08-25 00:03:35,477 - __main__ - INFO - sglang running req: 1 queue req: 0
  28063. 2025-08-25 00:03:36,321 - sglang - INFO - [2025-08-25 00:03:36 TP0] Decode batch. #running-req: 1, #token: 5174, token usage: 0.14, gen throughput (token/s): 47.38, #queue-req: 0
  28064. 2025-08-25 00:03:36,321 - __main__ - INFO - sglang running req: 1 queue req: 0
  28065. 2025-08-25 00:03:37,165 - sglang - INFO - [2025-08-25 00:03:37 TP0] Decode batch. #running-req: 1, #token: 5214, token usage: 0.14, gen throughput (token/s): 47.42, #queue-req: 0
  28066. 2025-08-25 00:03:37,165 - __main__ - INFO - sglang running req: 1 queue req: 0
  28067. 2025-08-25 00:03:38,009 - sglang - INFO - [2025-08-25 00:03:38 TP0] Decode batch. #running-req: 1, #token: 5254, token usage: 0.14, gen throughput (token/s): 47.35, #queue-req: 0
  28068. 2025-08-25 00:03:38,010 - __main__ - INFO - sglang running req: 1 queue req: 0
  28069. 2025-08-25 00:03:38,856 - sglang - INFO - [2025-08-25 00:03:38 TP0] Decode batch. #running-req: 1, #token: 5294, token usage: 0.14, gen throughput (token/s): 47.23, #queue-req: 0
  28070. 2025-08-25 00:03:38,857 - __main__ - INFO - sglang running req: 1 queue req: 0
  28071. 2025-08-25 00:03:39,704 - sglang - INFO - [2025-08-25 00:03:39 TP0] Decode batch. #running-req: 1, #token: 5334, token usage: 0.14, gen throughput (token/s): 47.20, #queue-req: 0
  28072. 2025-08-25 00:03:39,704 - __main__ - INFO - sglang running req: 1 queue req: 0
  28073. 2025-08-25 00:03:40,550 - sglang - INFO - [2025-08-25 00:03:40 TP0] Decode batch. #running-req: 1, #token: 5374, token usage: 0.14, gen throughput (token/s): 47.25, #queue-req: 0
  28074. 2025-08-25 00:03:40,551 - __main__ - INFO - sglang running req: 1 queue req: 0
  28075. 2025-08-25 00:03:41,397 - sglang - INFO - [2025-08-25 00:03:41 TP0] Decode batch. #running-req: 1, #token: 5414, token usage: 0.14, gen throughput (token/s): 47.27, #queue-req: 0
  28076. 2025-08-25 00:03:41,397 - __main__ - INFO - sglang running req: 1 queue req: 0
  28077. 2025-08-25 00:03:42,243 - sglang - INFO - [2025-08-25 00:03:42 TP0] Decode batch. #running-req: 1, #token: 5454, token usage: 0.14, gen throughput (token/s): 47.27, #queue-req: 0
  28078. 2025-08-25 00:03:42,243 - __main__ - INFO - sglang running req: 1 queue req: 0
  28079. 2025-08-25 00:03:43,091 - sglang - INFO - [2025-08-25 00:03:43 TP0] Decode batch. #running-req: 1, #token: 5494, token usage: 0.14, gen throughput (token/s): 47.19, #queue-req: 0
  28080. 2025-08-25 00:03:43,091 - __main__ - INFO - sglang running req: 1 queue req: 0
  28081. 2025-08-25 00:03:43,939 - sglang - INFO - [2025-08-25 00:03:43 TP0] Decode batch. #running-req: 1, #token: 5534, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
  28082. 2025-08-25 00:03:43,939 - __main__ - INFO - sglang running req: 1 queue req: 0
  28083. 2025-08-25 00:03:44,182 - __main__ - INFO - Queue remaining: 0
  28084. 2025-08-25 00:03:44,183 - __main__ - INFO -
  28085. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  28086. ----------------------------------------------------------------------------------
  28087. sglang_input_tokens 157.20 157.20
  28088. sglang_output_tokens 55.98 55.98
  28089. 2025-08-25 00:03:44,183 - __main__ - INFO -
  28090. Worker ID | finished | started
  28091. ----------+----------+--------
  28092. 0 | 10 | 11
  28093. 2025-08-25 00:03:44,786 - sglang - INFO - [2025-08-25 00:03:44 TP0] Decode batch. #running-req: 1, #token: 5574, token usage: 0.15, gen throughput (token/s): 47.22, #queue-req: 0
  28094. 2025-08-25 00:03:44,786 - __main__ - INFO - sglang running req: 1 queue req: 0
  28095. 2025-08-25 00:03:45,632 - sglang - INFO - [2025-08-25 00:03:45 TP0] Decode batch. #running-req: 1, #token: 5614, token usage: 0.15, gen throughput (token/s): 47.29, #queue-req: 0
  28096. 2025-08-25 00:03:45,632 - __main__ - INFO - sglang running req: 1 queue req: 0
  28097. 2025-08-25 00:03:46,478 - sglang - INFO - [2025-08-25 00:03:46 TP0] Decode batch. #running-req: 1, #token: 5654, token usage: 0.15, gen throughput (token/s): 47.26, #queue-req: 0
  28098. 2025-08-25 00:03:46,478 - __main__ - INFO - sglang running req: 1 queue req: 0
  28099. 2025-08-25 00:03:47,326 - sglang - INFO - [2025-08-25 00:03:47 TP0] Decode batch. #running-req: 1, #token: 5694, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
  28100. 2025-08-25 00:03:47,326 - __main__ - INFO - sglang running req: 1 queue req: 0
  28101. 2025-08-25 00:03:48,173 - sglang - INFO - [2025-08-25 00:03:48 TP0] Decode batch. #running-req: 1, #token: 5734, token usage: 0.15, gen throughput (token/s): 47.19, #queue-req: 0
  28102. 2025-08-25 00:03:48,174 - __main__ - INFO - sglang running req: 1 queue req: 0
  28103. 2025-08-25 00:03:49,021 - sglang - INFO - [2025-08-25 00:03:49 TP0] Decode batch. #running-req: 1, #token: 5774, token usage: 0.15, gen throughput (token/s): 47.17, #queue-req: 0
  28104. 2025-08-25 00:03:49,022 - __main__ - INFO - sglang running req: 1 queue req: 0
  28105. 2025-08-25 00:03:49,867 - sglang - INFO - [2025-08-25 00:03:49 TP0] Decode batch. #running-req: 1, #token: 5814, token usage: 0.15, gen throughput (token/s): 47.28, #queue-req: 0
  28106. 2025-08-25 00:03:49,868 - __main__ - INFO - sglang running req: 1 queue req: 0
  28107. 2025-08-25 00:03:50,714 - sglang - INFO - [2025-08-25 00:03:50 TP0] Decode batch. #running-req: 1, #token: 5854, token usage: 0.15, gen throughput (token/s): 47.25, #queue-req: 0
  28108. 2025-08-25 00:03:50,714 - __main__ - INFO - sglang running req: 1 queue req: 0
  28109. 2025-08-25 00:03:51,562 - sglang - INFO - [2025-08-25 00:03:51 TP0] Decode batch. #running-req: 1, #token: 5894, token usage: 0.16, gen throughput (token/s): 47.18, #queue-req: 0
  28110. 2025-08-25 00:03:51,562 - __main__ - INFO - sglang running req: 1 queue req: 0
  28111. 2025-08-25 00:03:52,410 - sglang - INFO - [2025-08-25 00:03:52 TP0] Decode batch. #running-req: 1, #token: 5934, token usage: 0.16, gen throughput (token/s): 47.16, #queue-req: 0
  28112. 2025-08-25 00:03:52,410 - __main__ - INFO - sglang running req: 1 queue req: 0
  28113. 2025-08-25 00:03:53,257 - sglang - INFO - [2025-08-25 00:03:53 TP0] Decode batch. #running-req: 1, #token: 5974, token usage: 0.16, gen throughput (token/s): 47.20, #queue-req: 0
  28114. 2025-08-25 00:03:53,258 - __main__ - INFO - sglang running req: 1 queue req: 0
  28115. 2025-08-25 00:03:54,104 - sglang - INFO - [2025-08-25 00:03:54 TP0] Decode batch. #running-req: 1, #token: 6014, token usage: 0.16, gen throughput (token/s): 47.24, #queue-req: 0
  28116. 2025-08-25 00:03:54,104 - __main__ - INFO - sglang running req: 1 queue req: 0
  28117. 2025-08-25 00:03:54,184 - __main__ - INFO - Queue remaining: 0
  28118. 2025-08-25 00:03:54,184 - __main__ - INFO -
  28119. Metric Name Lifetime (tokens/sec) Recently (tokens/sec)
  28120. ----------------------------------------------------------------------------------
  28121. sglang_input_tokens 151.08 151.08
  28122. sglang_output_tokens 53.80 53.80
  28123. 2025-08-25 00:03:54,184 - __main__ - INFO -
  28124. Worker ID | finished | started
  28125. ----------+----------+--------
  28126. 0 | 10 | 11
  28127. 2025-08-25 00:03:54,952 - sglang - INFO - [2025-08-25 00:03:54 TP0] Decode batch. #running-req: 1, #token: 6054, token usage: 0.16, gen throughput (token/s): 47.19, #queue-req: 0
  28128. 2025-08-25 00:03:54,952 - __main__ - INFO - sglang running req: 1 queue req: 0
  28129. 2025-08-25 00:03:55,277 - __main__ - INFO - Finished TaskGroup for worker on 73c9399482ed5cf37e1888c000e49ef82a30c10d
  28130. 2025-08-25 00:03:55,278 - __main__ - INFO - Got 1 docs for 73c9399482ed5cf37e1888c000e49ef82a30c10d
  28131. 2025-08-25 00:03:55,280 - __main__ - INFO - Worker 0 exiting due to empty queue
  28132. 2025-08-25 00:03:55,280 - __main__ - INFO - Work done
  28133. 2025-08-25 00:03:55,281 - __main__ - INFO - Got cancellation request for SGLang server